diff options
author | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-07-10 21:48:38 +0100 |
---|---|---|
committer | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-07-15 00:35:34 +0100 |
commit | 076e4a523167f550b56da9d540fcee7fcbcc290b (patch) | |
tree | 4971fa25943fe489aae1729020d91dda4f2a231d /lorry | |
parent | 9662091853cbb3e7c9d9962afce53e94aa238924 (diff) | |
download | lorry-076e4a523167f550b56da9d540fcee7fcbcc290b.tar.gz |
lorry: Copy modification time from server to downloaded files
lorry.gzip-importer uses the given file's modification time (mtime) as
the commit time, but this is currently unrelated to the mtime on the
server and so is unreproducible.
Copy the Last-Modified field of the urllib response header, if
present, to the mtime (and atime) of a file after downloading it. For
FTP downloads, this response field is synthesised by our FTP handler.
Closes #4.
Diffstat (limited to 'lorry')
-rwxr-xr-x | lorry | 10 |
1 files changed, 10 insertions, 0 deletions
@@ -28,6 +28,7 @@ import traceback import functools import stat import email.message +import email.utils import ftplib import re @@ -639,7 +640,16 @@ class Lorry(cliapp.Application): with open(archive_dest, 'wb') as archive_file: urlfile = urllib.request.urlopen(spec['url']) archive_file.write(urlfile.read()) + try: + # HTTP dates use (one of) the email date formats + url_date = email.utils.mktime_tz( + email.utils.parsedate_tz( + urlfile.info()['Last-Modified'])) + except (KeyError, ValueError, TypeError): + url_date = None urlfile.close() + if url_date: + os.utime(archive_dest, (url_date, url_date)) except Exception: if os.path.exists(archive_dest): os.unlink(archive_dest) |