diff options
author | Stéphane Bidoul <stephane.bidoul@gmail.com> | 2023-04-10 11:23:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-10 11:23:24 +0200 |
commit | 42b19abb69cbaee09c08a1d9c316553319677e20 (patch) | |
tree | 1b5b0b666c87efffa5ef73858f0b42497ed80777 | |
parent | aebc0c5fc321141ede837c80572427ab7b795c3f (diff) | |
parent | 89e7208784905d7db6b3bfe75f8be00cc8f65895 (diff) | |
download | pip-42b19abb69cbaee09c08a1d9c316553319677e20.tar.gz |
Merge pull request #11936 from sbidoul/fix-link-hash-parsing
Various fixes to the link hash parser
-rw-r--r-- | news/11936.bugfix.rst | 1 | ||||
-rw-r--r-- | src/pip/_internal/models/link.py | 35 | ||||
-rw-r--r-- | tests/unit/test_collector.py | 34 |
3 files changed, 55 insertions, 15 deletions
diff --git a/news/11936.bugfix.rst b/news/11936.bugfix.rst new file mode 100644 index 000000000..4ae3ad69a --- /dev/null +++ b/news/11936.bugfix.rst @@ -0,0 +1 @@ +Fix and improve the parsing of hashes embedded in URL fragments. diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index a1e4d5a08..e741c3283 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -55,25 +55,37 @@ class LinkHash: name: str value: str - _hash_re = re.compile( + _hash_url_fragment_re = re.compile( # NB: we do not validate that the second group (.*) is a valid hex # digest. Instead, we simply keep that string in this class, and then check it # against Hashes when hash-checking is needed. This is easier to debug than # proactively discarding an invalid hex digest, as we handle incorrect hashes # and malformed hashes in the same place. - r"({choices})=(.*)".format( + r"[#&]({choices})=([^&]*)".format( choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES) ), ) def __post_init__(self) -> None: - assert self._hash_re.match(f"{self.name}={self.value}") + assert self.name in _SUPPORTED_HASHES + + @classmethod + def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]: + """Parse a PEP 658 data-dist-info-metadata hash.""" + if dist_info_metadata == "true": + return None + name, sep, value = dist_info_metadata.partition("=") + if not sep: + return None + if name not in _SUPPORTED_HASHES: + return None + return cls(name=name, value=value) @classmethod @functools.lru_cache(maxsize=None) - def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]: + def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]: """Search a string for a checksum algorithm name and encoded output value.""" - match = cls._hash_re.search(url) + match = cls._hash_url_fragment_re.search(url) if match is None: return None name, value = match.groups() @@ -217,7 +229,7 @@ class Link(KeyBasedCompareMixin): # trying to set a new value. self._url = url - link_hash = LinkHash.split_hash_name_and_value(url) + link_hash = LinkHash.find_hash_url_fragment(url) hashes_from_link = {} if link_hash is None else link_hash.as_dict() if hashes is None: self._hashes = hashes_from_link @@ -402,15 +414,10 @@ class Link(KeyBasedCompareMixin): if self.dist_info_metadata is None: return None metadata_url = f"{self.url_without_fragment}.metadata" - # If data-dist-info-metadata="true" is set, then the metadata file exists, - # but there is no information about its checksum or anything else. - if self.dist_info_metadata != "true": - link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata) - else: - link_hash = None - if link_hash is None: + metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata) + if metadata_link_hash is None: return Link(metadata_url) - return Link(metadata_url, hashes=link_hash.as_dict()) + return Link(metadata_url, hashes=metadata_link_hash.as_dict()) def as_hashes(self) -> Hashes: return Hashes({k: [v] for k, v in self._hashes.items()}) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 26a2ce4b9..e855d78e1 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -1052,6 +1052,21 @@ def test_link_collector_create_find_links_expansion( LinkHash("sha256", "aa113592bbe"), ), ( + "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe&subdirectory=setup", + LinkHash("sha256", "aa113592bbe"), + ), + ( + "https://pypi.org/pip-18.0.tar.gz#subdirectory=setup&sha256=aa113592bbe", + LinkHash("sha256", "aa113592bbe"), + ), + # "xsha256" is not a valid algorithm, so we discard it. + ("https://pypi.org/pip-18.0.tar.gz#xsha256=aa113592bbe", None), + # Empty hash. + ( + "https://pypi.org/pip-18.0.tar.gz#sha256=", + LinkHash("sha256", ""), + ), + ( "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe", LinkHash("md5", "aa113592bbe"), ), @@ -1061,4 +1076,21 @@ def test_link_collector_create_find_links_expansion( ], ) def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None: - assert LinkHash.split_hash_name_and_value(url) == result + assert LinkHash.find_hash_url_fragment(url) == result + + +@pytest.mark.parametrize( + "dist_info_metadata, result", + [ + ("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")), + ("sha256=", LinkHash("sha256", "")), + ("sha500=aa113592bbe", None), + ("true", None), + ("", None), + ("aa113592bbe", None), + ], +) +def test_pep658_hash_parsing( + dist_info_metadata: str, result: Optional[LinkHash] +) -> None: + assert LinkHash.parse_pep658_hash(dist_info_metadata) == result |