Resolve build failures on appveyor/Windows due to incomplete (abbreviated) download lists of github releases.

Closes https://bugs.launchpad.net/lxml/+bug/1993962
author: Stefan Behnel <stefan_ml@behnel.de> 2022-10-24 17:19:14 +0200
committer: Stefan Behnel <stefan_ml@behnel.de> 2022-10-24 17:19:26 +0200
commit: 77413c28e237f3f4bf2363150aaab9d2f7705f7b (patch)
tree: 550d2f38f81ea48cb276a1403971ca84c976f58e /buildlibxml.py
parent: 0b10088e4a358d5153defee68c1f7dfd2c06f18b (diff)
download: python-lxml-77413c28e237f3f4bf2363150aaab9d2f7705f7b.tar.gz
1 files changed, 36 insertions, 26 deletions
diff --git a/buildlibxml.py b/buildlibxml.py
index 5a7a4c64..8a7504eb 100644
--- a/buildlibxml.py
+++ b/buildlibxml.py
@@ -1,3 +1,4 @@
+import json
 import os, re, sys, subprocess, platform
 import tarfile
 from distutils import log
@@ -5,11 +6,12 @@ from contextlib import closing, contextmanager
 from ftplib import FTP
 
 try:
-    from urlparse import urljoin, unquote, urlparse
-    from urllib import urlretrieve, urlopen, urlcleanup
-except ImportError:
     from urllib.parse import urljoin, unquote, urlparse
-    from urllib.request import urlretrieve, urlopen, urlcleanup
+    from urllib.request import urlretrieve, urlopen, urlcleanup, Request
+except ImportError:  # Py2
+    from urlparse import urljoin, unquote, urlparse
+    from urllib import urlretrieve, urlcleanup
+    from urllib2 import urlopen, Request
 
 multi_make_options = []
 try:
@@ -30,17 +32,16 @@ sys_platform = sys.platform
 # use pre-built libraries on Windows
 
 def download_and_extract_windows_binaries(destdir):
-    url = "https://github.com/lxml/libxml2-win-binaries/releases"
-    filenames = list(_list_dir_urllib(url))
-
-    release_path = "/download/%s/" % find_max_version(
-        "library release", filenames, re.compile(r"/releases/tag/([0-9.]+[0-9])$"))
-    url += release_path
-    filenames = [
-        filename.rsplit('/', 1)[1]
-        for filename in filenames
-        if release_path in filename
-    ]
+    url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases"
+    releases, _ = read_url(url, accept="application/vnd.github+json", as_json=True)
+
+    max_release = {'tag_name': ''}
+    for release in releases:
+        if max_release['tag_name'] < release.get('tag_name', ''):
+            max_release = release
+
+    url = "https://github.com/lxml/libxml2-win-binaries/releases/download/%s/" % max_release['tag_name']
+    filenames = [asset['name'] for asset in max_release.get('assets', ())]
 
     # Check for native ARM64 build or the environment variable that is set by
     # Visual Studio for cross-compilation (same variable as setuptools uses)
@@ -168,13 +169,26 @@ def _list_dir_ftplib(url):
     return parse_text_ftplist("\n".join(data))
 
 
-def _list_dir_urllib(url):
-    with closing(urlopen(url)) as res:
+def read_url(url, decode=True, accept=None, as_json=False):
+    if accept:
+        request = Request(url, headers={'Accept': accept})
+    else:
+        request = Request(url)
+
+    with closing(urlopen(request)) as res:
         charset = _find_content_encoding(res)
         content_type = res.headers.get('Content-Type')
         data = res.read()
 
-    data = data.decode(charset)
+    if decode:
+        data = data.decode(charset)
+    if as_json:
+        data = json.loads(data)
+    return data, content_type
+
+
+def _list_dir_urllib(url):
+    data, content_type = read_url(url)
     if content_type and content_type.startswith('text/html'):
         files = parse_html_filelist(data)
     else:
@@ -183,13 +197,11 @@ def _list_dir_urllib(url):
 
 
 def http_find_latest_version_directory(url, version=None):
-    with closing(urlopen(url)) as res:
-        charset = _find_content_encoding(res)
-        data = res.read()
+    data, _ = read_url(url)
     # e.g. <a href="1.0/">
     directories = [
         (int(v[0]), int(v[1]))
-        for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data.decode(charset))
+        for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data)
     ]
     if not directories:
         return url
@@ -204,10 +216,8 @@ def http_find_latest_version_directory(url, version=None):
 
 
 def http_listfiles(url, re_pattern):
-    with closing(urlopen(url)) as res:
-        charset = _find_content_encoding(res)
-        data = res.read()
-    files = re.findall(re_pattern, data.decode(charset))
+    data, _ = read_url(url)
+    files = re.findall(re_pattern, data)
     return files
author	Stefan Behnel <stefan_ml@behnel.de>	2022-10-24 17:19:14 +0200
committer	Stefan Behnel <stefan_ml@behnel.de>	2022-10-24 17:19:26 +0200
commit	77413c28e237f3f4bf2363150aaab9d2f7705f7b (patch)
tree	550d2f38f81ea48cb276a1403971ca84c976f58e /buildlibxml.py
parent	0b10088e4a358d5153defee68c1f7dfd2c06f18b (diff)
download	python-lxml-77413c28e237f3f4bf2363150aaab9d2f7705f7b.tar.gz