From 076e4a523167f550b56da9d540fcee7fcbcc290b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jul 2020 21:48:38 +0100 Subject: lorry: Copy modification time from server to downloaded files lorry.gzip-importer uses the given file's modification time (mtime) as the commit time, but this is currently unrelated to the mtime on the server and so is unreproducible. Copy the Last-Modified field of the urllib response header, if present, to the mtime (and atime) of a file after downloading it. For FTP downloads, this response field is synthesised by our FTP handler. Closes #4. --- lorry | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lorry') diff --git a/lorry b/lorry index 4f7f574..eba2ef0 100755 --- a/lorry +++ b/lorry @@ -28,6 +28,7 @@ import traceback import functools import stat import email.message +import email.utils import ftplib import re @@ -639,7 +640,16 @@ class Lorry(cliapp.Application): with open(archive_dest, 'wb') as archive_file: urlfile = urllib.request.urlopen(spec['url']) archive_file.write(urlfile.read()) + try: + # HTTP dates use (one of) the email date formats + url_date = email.utils.mktime_tz( + email.utils.parsedate_tz( + urlfile.info()['Last-Modified'])) + except (KeyError, ValueError, TypeError): + url_date = None urlfile.close() + if url_date: + os.utime(archive_dest, (url_date, url_date)) except Exception: if os.path.exists(archive_dest): os.unlink(archive_dest) -- cgit v1.2.1