diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2022-10-24 17:19:14 +0200 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2022-10-24 17:19:26 +0200 |
commit | 77413c28e237f3f4bf2363150aaab9d2f7705f7b (patch) | |
tree | 550d2f38f81ea48cb276a1403971ca84c976f58e /buildlibxml.py | |
parent | 0b10088e4a358d5153defee68c1f7dfd2c06f18b (diff) | |
download | python-lxml-77413c28e237f3f4bf2363150aaab9d2f7705f7b.tar.gz |
Resolve build failures on appveyor/Windows due to incomplete (abbreviated) download lists of github releases.
Closes https://bugs.launchpad.net/lxml/+bug/1993962
Diffstat (limited to 'buildlibxml.py')
-rw-r--r-- | buildlibxml.py | 62 |
1 files changed, 36 insertions, 26 deletions
diff --git a/buildlibxml.py b/buildlibxml.py index 5a7a4c64..8a7504eb 100644 --- a/buildlibxml.py +++ b/buildlibxml.py @@ -1,3 +1,4 @@ +import json import os, re, sys, subprocess, platform import tarfile from distutils import log @@ -5,11 +6,12 @@ from contextlib import closing, contextmanager from ftplib import FTP try: - from urlparse import urljoin, unquote, urlparse - from urllib import urlretrieve, urlopen, urlcleanup -except ImportError: from urllib.parse import urljoin, unquote, urlparse - from urllib.request import urlretrieve, urlopen, urlcleanup + from urllib.request import urlretrieve, urlopen, urlcleanup, Request +except ImportError: # Py2 + from urlparse import urljoin, unquote, urlparse + from urllib import urlretrieve, urlcleanup + from urllib2 import urlopen, Request multi_make_options = [] try: @@ -30,17 +32,16 @@ sys_platform = sys.platform # use pre-built libraries on Windows def download_and_extract_windows_binaries(destdir): - url = "https://github.com/lxml/libxml2-win-binaries/releases" - filenames = list(_list_dir_urllib(url)) - - release_path = "/download/%s/" % find_max_version( - "library release", filenames, re.compile(r"/releases/tag/([0-9.]+[0-9])$")) - url += release_path - filenames = [ - filename.rsplit('/', 1)[1] - for filename in filenames - if release_path in filename - ] + url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases" + releases, _ = read_url(url, accept="application/vnd.github+json", as_json=True) + + max_release = {'tag_name': ''} + for release in releases: + if max_release['tag_name'] < release.get('tag_name', ''): + max_release = release + + url = "https://github.com/lxml/libxml2-win-binaries/releases/download/%s/" % max_release['tag_name'] + filenames = [asset['name'] for asset in max_release.get('assets', ())] # Check for native ARM64 build or the environment variable that is set by # Visual Studio for cross-compilation (same variable as setuptools uses) @@ -168,13 +169,26 @@ def _list_dir_ftplib(url): return parse_text_ftplist("\n".join(data)) -def _list_dir_urllib(url): - with closing(urlopen(url)) as res: +def read_url(url, decode=True, accept=None, as_json=False): + if accept: + request = Request(url, headers={'Accept': accept}) + else: + request = Request(url) + + with closing(urlopen(request)) as res: charset = _find_content_encoding(res) content_type = res.headers.get('Content-Type') data = res.read() - data = data.decode(charset) + if decode: + data = data.decode(charset) + if as_json: + data = json.loads(data) + return data, content_type + + +def _list_dir_urllib(url): + data, content_type = read_url(url) if content_type and content_type.startswith('text/html'): files = parse_html_filelist(data) else: @@ -183,13 +197,11 @@ def _list_dir_urllib(url): def http_find_latest_version_directory(url, version=None): - with closing(urlopen(url)) as res: - charset = _find_content_encoding(res) - data = res.read() + data, _ = read_url(url) # e.g. <a href="1.0/"> directories = [ (int(v[0]), int(v[1])) - for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data.decode(charset)) + for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data) ] if not directories: return url @@ -204,10 +216,8 @@ def http_find_latest_version_directory(url, version=None): def http_listfiles(url, re_pattern): - with closing(urlopen(url)) as res: - charset = _find_content_encoding(res) - data = res.read() - files = re.findall(re_pattern, data.decode(charset)) + data, _ = read_url(url) + files = re.findall(re_pattern, data) return files |