summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStéphane Bidoul <stephane.bidoul@gmail.com>2023-04-10 11:23:24 +0200
committerGitHub <noreply@github.com>2023-04-10 11:23:24 +0200
commit42b19abb69cbaee09c08a1d9c316553319677e20 (patch)
tree1b5b0b666c87efffa5ef73858f0b42497ed80777
parentaebc0c5fc321141ede837c80572427ab7b795c3f (diff)
parent89e7208784905d7db6b3bfe75f8be00cc8f65895 (diff)
downloadpip-42b19abb69cbaee09c08a1d9c316553319677e20.tar.gz
Merge pull request #11936 from sbidoul/fix-link-hash-parsing
Various fixes to the link hash parser
-rw-r--r--news/11936.bugfix.rst1
-rw-r--r--src/pip/_internal/models/link.py35
-rw-r--r--tests/unit/test_collector.py34
3 files changed, 55 insertions, 15 deletions
diff --git a/news/11936.bugfix.rst b/news/11936.bugfix.rst
new file mode 100644
index 000000000..4ae3ad69a
--- /dev/null
+++ b/news/11936.bugfix.rst
@@ -0,0 +1 @@
+Fix and improve the parsing of hashes embedded in URL fragments.
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index a1e4d5a08..e741c3283 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -55,25 +55,37 @@ class LinkHash:
name: str
value: str
- _hash_re = re.compile(
+ _hash_url_fragment_re = re.compile(
# NB: we do not validate that the second group (.*) is a valid hex
# digest. Instead, we simply keep that string in this class, and then check it
# against Hashes when hash-checking is needed. This is easier to debug than
# proactively discarding an invalid hex digest, as we handle incorrect hashes
# and malformed hashes in the same place.
- r"({choices})=(.*)".format(
+ r"[#&]({choices})=([^&]*)".format(
choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
),
)
def __post_init__(self) -> None:
- assert self._hash_re.match(f"{self.name}={self.value}")
+ assert self.name in _SUPPORTED_HASHES
+
+ @classmethod
+ def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
+ """Parse a PEP 658 data-dist-info-metadata hash."""
+ if dist_info_metadata == "true":
+ return None
+ name, sep, value = dist_info_metadata.partition("=")
+ if not sep:
+ return None
+ if name not in _SUPPORTED_HASHES:
+ return None
+ return cls(name=name, value=value)
@classmethod
@functools.lru_cache(maxsize=None)
- def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
+ def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
"""Search a string for a checksum algorithm name and encoded output value."""
- match = cls._hash_re.search(url)
+ match = cls._hash_url_fragment_re.search(url)
if match is None:
return None
name, value = match.groups()
@@ -217,7 +229,7 @@ class Link(KeyBasedCompareMixin):
# trying to set a new value.
self._url = url
- link_hash = LinkHash.split_hash_name_and_value(url)
+ link_hash = LinkHash.find_hash_url_fragment(url)
hashes_from_link = {} if link_hash is None else link_hash.as_dict()
if hashes is None:
self._hashes = hashes_from_link
@@ -402,15 +414,10 @@ class Link(KeyBasedCompareMixin):
if self.dist_info_metadata is None:
return None
metadata_url = f"{self.url_without_fragment}.metadata"
- # If data-dist-info-metadata="true" is set, then the metadata file exists,
- # but there is no information about its checksum or anything else.
- if self.dist_info_metadata != "true":
- link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
- else:
- link_hash = None
- if link_hash is None:
+ metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
+ if metadata_link_hash is None:
return Link(metadata_url)
- return Link(metadata_url, hashes=link_hash.as_dict())
+ return Link(metadata_url, hashes=metadata_link_hash.as_dict())
def as_hashes(self) -> Hashes:
return Hashes({k: [v] for k, v in self._hashes.items()})
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index 26a2ce4b9..e855d78e1 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -1052,6 +1052,21 @@ def test_link_collector_create_find_links_expansion(
LinkHash("sha256", "aa113592bbe"),
),
(
+ "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe&subdirectory=setup",
+ LinkHash("sha256", "aa113592bbe"),
+ ),
+ (
+ "https://pypi.org/pip-18.0.tar.gz#subdirectory=setup&sha256=aa113592bbe",
+ LinkHash("sha256", "aa113592bbe"),
+ ),
+ # "xsha256" is not a valid algorithm, so we discard it.
+ ("https://pypi.org/pip-18.0.tar.gz#xsha256=aa113592bbe", None),
+ # Empty hash.
+ (
+ "https://pypi.org/pip-18.0.tar.gz#sha256=",
+ LinkHash("sha256", ""),
+ ),
+ (
"https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
LinkHash("md5", "aa113592bbe"),
),
@@ -1061,4 +1076,21 @@ def test_link_collector_create_find_links_expansion(
],
)
def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
- assert LinkHash.split_hash_name_and_value(url) == result
+ assert LinkHash.find_hash_url_fragment(url) == result
+
+
+@pytest.mark.parametrize(
+ "dist_info_metadata, result",
+ [
+ ("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
+ ("sha256=", LinkHash("sha256", "")),
+ ("sha500=aa113592bbe", None),
+ ("true", None),
+ ("", None),
+ ("aa113592bbe", None),
+ ],
+)
+def test_pep658_hash_parsing(
+ dist_info_metadata: str, result: Optional[LinkHash]
+) -> None:
+ assert LinkHash.parse_pep658_hash(dist_info_metadata) == result