summaryrefslogtreecommitdiff
path: root/buildlibxml.py
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2022-10-24 17:19:14 +0200
committerStefan Behnel <stefan_ml@behnel.de>2022-10-24 17:19:26 +0200
commit77413c28e237f3f4bf2363150aaab9d2f7705f7b (patch)
tree550d2f38f81ea48cb276a1403971ca84c976f58e /buildlibxml.py
parent0b10088e4a358d5153defee68c1f7dfd2c06f18b (diff)
downloadpython-lxml-77413c28e237f3f4bf2363150aaab9d2f7705f7b.tar.gz
Resolve build failures on appveyor/Windows due to incomplete (abbreviated) download lists of github releases.
Closes https://bugs.launchpad.net/lxml/+bug/1993962
Diffstat (limited to 'buildlibxml.py')
-rw-r--r--buildlibxml.py62
1 files changed, 36 insertions, 26 deletions
diff --git a/buildlibxml.py b/buildlibxml.py
index 5a7a4c64..8a7504eb 100644
--- a/buildlibxml.py
+++ b/buildlibxml.py
@@ -1,3 +1,4 @@
+import json
import os, re, sys, subprocess, platform
import tarfile
from distutils import log
@@ -5,11 +6,12 @@ from contextlib import closing, contextmanager
from ftplib import FTP
try:
- from urlparse import urljoin, unquote, urlparse
- from urllib import urlretrieve, urlopen, urlcleanup
-except ImportError:
from urllib.parse import urljoin, unquote, urlparse
- from urllib.request import urlretrieve, urlopen, urlcleanup
+ from urllib.request import urlretrieve, urlopen, urlcleanup, Request
+except ImportError: # Py2
+ from urlparse import urljoin, unquote, urlparse
+ from urllib import urlretrieve, urlcleanup
+ from urllib2 import urlopen, Request
multi_make_options = []
try:
@@ -30,17 +32,16 @@ sys_platform = sys.platform
# use pre-built libraries on Windows
def download_and_extract_windows_binaries(destdir):
- url = "https://github.com/lxml/libxml2-win-binaries/releases"
- filenames = list(_list_dir_urllib(url))
-
- release_path = "/download/%s/" % find_max_version(
- "library release", filenames, re.compile(r"/releases/tag/([0-9.]+[0-9])$"))
- url += release_path
- filenames = [
- filename.rsplit('/', 1)[1]
- for filename in filenames
- if release_path in filename
- ]
+ url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases"
+ releases, _ = read_url(url, accept="application/vnd.github+json", as_json=True)
+
+ max_release = {'tag_name': ''}
+ for release in releases:
+ if max_release['tag_name'] < release.get('tag_name', ''):
+ max_release = release
+
+ url = "https://github.com/lxml/libxml2-win-binaries/releases/download/%s/" % max_release['tag_name']
+ filenames = [asset['name'] for asset in max_release.get('assets', ())]
# Check for native ARM64 build or the environment variable that is set by
# Visual Studio for cross-compilation (same variable as setuptools uses)
@@ -168,13 +169,26 @@ def _list_dir_ftplib(url):
return parse_text_ftplist("\n".join(data))
-def _list_dir_urllib(url):
- with closing(urlopen(url)) as res:
+def read_url(url, decode=True, accept=None, as_json=False):
+ if accept:
+ request = Request(url, headers={'Accept': accept})
+ else:
+ request = Request(url)
+
+ with closing(urlopen(request)) as res:
charset = _find_content_encoding(res)
content_type = res.headers.get('Content-Type')
data = res.read()
- data = data.decode(charset)
+ if decode:
+ data = data.decode(charset)
+ if as_json:
+ data = json.loads(data)
+ return data, content_type
+
+
+def _list_dir_urllib(url):
+ data, content_type = read_url(url)
if content_type and content_type.startswith('text/html'):
files = parse_html_filelist(data)
else:
@@ -183,13 +197,11 @@ def _list_dir_urllib(url):
def http_find_latest_version_directory(url, version=None):
- with closing(urlopen(url)) as res:
- charset = _find_content_encoding(res)
- data = res.read()
+ data, _ = read_url(url)
# e.g. <a href="1.0/">
directories = [
(int(v[0]), int(v[1]))
- for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data.decode(charset))
+ for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data)
]
if not directories:
return url
@@ -204,10 +216,8 @@ def http_find_latest_version_directory(url, version=None):
def http_listfiles(url, re_pattern):
- with closing(urlopen(url)) as res:
- charset = _find_content_encoding(res)
- data = res.read()
- files = re.findall(re_pattern, data.decode(charset))
+ data, _ = read_url(url)
+ files = re.findall(re_pattern, data)
return files