diff options
author | Pedro Alvarez Piedehierro <pedro@alvarezpiedehierro.com> | 2021-12-15 15:39:15 +0000 |
---|---|---|
committer | Pedro Alvarez Piedehierro <pedro@alvarezpiedehierro.com> | 2021-12-15 15:39:15 +0000 |
commit | c254ed305772b3173e97d7dea6e2da03bf9ec75d (patch) | |
tree | ac0052051016c30d9a75c737ebefb0f64a2bf028 | |
parent | 51accdb426463e3669d729f7959714068d2b9af9 (diff) | |
parent | 41cdafd32b50704c26e21542932f53ce2eaae01a (diff) | |
download | lorry-c254ed305772b3173e97d7dea6e2da03bf9ec75d.tar.gz |
Merge branch 'ellisb/setuseragent' into 'master'
Spoof user-agent to look like browser
See merge request CodethinkLabs/lorry/lorry!27
-rwxr-xr-x | lorry | 23 |
1 files changed, 19 insertions, 4 deletions
@@ -33,6 +33,7 @@ import ftplib import re import subprocess import tempfile +import contextlib import yaml @@ -809,7 +810,7 @@ class Lorry(cliapp.Application): self.progress('.. attempting to fetch %s' % basename) try: with open(file_dest, 'wb') as raw_file, \ - urllib.request.urlopen(url) as urlfile: + self.urlopen(url) as urlfile: raw_file.write(urlfile.read()) try: # HTTP dates use (one of) the email date formats @@ -857,8 +858,8 @@ class Lorry(cliapp.Application): if file_missing_or_empty(archive_dest): self.progress('.. attempting to fetch.') try: - with open(archive_dest, 'wb') as archive_file: - urlfile = urllib.request.urlopen(spec['url']) + with open(archive_dest, 'wb') as archive_file, \ + self.urlopen(spec['url']) as urlfile: archive_file.write(urlfile.read()) try: # HTTP dates use (one of) the email date formats @@ -867,7 +868,6 @@ class Lorry(cliapp.Application): urlfile.info()['Last-Modified'])) except (KeyError, ValueError, TypeError): url_date = None - urlfile.close() if url_date: os.utime(archive_dest, (url_date, url_date)) except Exception: @@ -984,6 +984,21 @@ class Lorry(cliapp.Application): os.unlink(marks_temp_name) raise + @contextlib.contextmanager + def urlopen(self, url): + try: + req = urllib.request.Request(url) + with urllib.request.urlopen(req) as urlfile: + yield urlfile + except urllib.error.HTTPError as e: + if e.getcode() == 403: + newreq = urllib.request.Request(url) + newreq.add_header('User-Agent', + 'Lorry/%s (https://gitlab.com/CodethinkLabs/lorry/lorry)' % __version__) + with urllib.request.urlopen(newreq) as newurlfile: + yield newurlfile + else: + raise if __name__ == '__main__': Lorry(version=__version__).run() |