From 9662091853cbb3e7c9d9962afce53e94aa238924 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 11 Jul 2020 00:44:21 +0100 Subject: lorry: Get modification time for FTP downloads lorry.gzip-importer uses the given file's modification time (mtime) as the commit time, but this is currently unrelated to the mtime on the server and so is unreproducible. For FTP downloads, urllib's default handler does not provide an mtime and there is no reasonable way to add that in a subclass. Define and use a new handler class that attempts to get the mtime from the server using the MDTM extension. Related to #4. --- lorry | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) (limited to 'lorry') diff --git a/lorry b/lorry index 0318ef8..4f7f574 100755 --- a/lorry +++ b/lorry @@ -19,7 +19,7 @@ import cliapp import json import logging import os -import urllib.request, urllib.parse +import urllib.request, urllib.parse, urllib.response import string import sys from datetime import datetime @@ -27,6 +27,9 @@ import shutil import traceback import functools import stat +import email.message +import ftplib +import re import yaml @@ -69,6 +72,109 @@ def find_bazaar_command(): return find_exec_in_path('bzr') or find_exec_in_path('brz') +# This is a simplified replacement for urllib.request.FTPHandler, with +# one additional feature: it uses the MDTM extension specified in RFC +# 3659, and sets the Last-Modified header based on the result. +class SimpleFTPHandler(urllib.request.BaseHandler): + # Priority needs to be higher (numerically lower) than the + # standard FTPHandler + handler_order = urllib.request.FTPHandler.handler_order - 1 + + # Format is YYYYMMDDhhmmss with optional fractional seconds (which + # we ignore). The implicit time zone is UTC. + _mdtm_response_re = re.compile(r'^213 (\d{14})(?:\.\d+)?$') + + def ftp_open(self, req): + from urllib.request import URLError + + if getattr(req, 'method', None) not in [None, 'GET']: + raise URLError('SimpleFTPHandler: only supports GET method') + url_parts = urllib.parse.urlparse(req.full_url) + if url_parts.username or url_parts.password: + raise URLError('SimpleFTPHandler: only supports anonymous FTP') + if ';' in url_parts.path or url_parts.params or url_parts.query: + raise URLError('SimpleFTPHandler: does not support parameters') + + path_parts = [] + for part in url_parts.path.split('/'): + if part == '': + continue + part = urllib.parse.unquote(part) + if '\r\n' in part: + raise URLError('SimpleFTPHandler: illegal characters in path') + path_parts.append(part) + + ftp = ftplib.FTP() + try: + ftp.connect(url_parts.hostname, url_parts.port or 21) + ftp.login() + for part in path_parts[:-1]: + ftp.cwd(part) + + # Try to get the mtime from the server, ignoring error + # or invalid responses + mtime = None + try: + mdtm_response = ftp.sendcmd('MDTM ' + path_parts[-1]) + except ftplib.error_reply: + pass + else: + match = self._mdtm_response_re.match(mdtm_response) + if match: + mtime_s = match.group(1) + try: + mtime = datetime( + int(mtime_s[0:4]), int(mtime_s[4:6]), + int(mtime_s[6:8]), int(mtime_s[8:10]), + int(mtime_s[10:12]), int(mtime_s[12:14])) + except ValueError: + pass + + # Start binary mode transfer + ftp.voidcmd('TYPE I') + data_sock, size = ftp.ntransfercmd('RETR ' + path_parts[-1]) + data_file = data_sock.makefile('rb') + + try: + # Synthesise an HTTP-like response header + header = email.message.EmailMessage() + if size is not None: + header['Content-Length'] = str(size) + if mtime is not None: + header['Last-Modified'] = \ + mtime.strftime('%a, %d %b %Y %H:%M:%S GMT') + + # Wrap up the file with a close hook to close the + # control socket as well, and the extra metadata + # expected in a response object + response = urllib.response.addinfourl( + urllib.response.addclosehook(data_file, self._ftp_close), + header, req.full_url) + self.ftp = ftp + ftp = None + data_file = None + return response + + finally: + # Close data socket on error + if data_file: + data_file.close() + + except ftplib.all_errors as e: + # Re-raise as URLError + raise URLError('SimpleFTPHandler: %r' % e) \ + .with_traceback(sys.exc_info()[2]) + + finally: + # Close control socket on error + if ftp: + ftp.close() + + def _ftp_close(self): + self.ftp.close() + del self.ftp + + class Lorry(cliapp.Application): def add_settings(self): @@ -130,6 +236,9 @@ class Lorry(cliapp.Application): if not os.path.exists(self.settings['working-area']): os.makedirs(self.settings['working-area']) + urllib.request.install_opener( + urllib.request.build_opener(SimpleFTPHandler)) + for arg in args: self.progress('Processing spec file %s' % arg) with open(arg) as f: -- cgit v1.2.1