diff options
Diffstat (limited to 'Lib/packaging/tests/test_pypi_simple.py')
-rw-r--r-- | Lib/packaging/tests/test_pypi_simple.py | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py new file mode 100644 index 0000000000..59204c4b8d --- /dev/null +++ b/Lib/packaging/tests/test_pypi_simple.py @@ -0,0 +1,353 @@ +"""Tests for the packaging.pypi.simple module.""" +import re +import os +import sys +import http.client +import urllib.error +import urllib.parse +import urllib.request + +from packaging.pypi.simple import Crawler + +from packaging.tests import unittest +from packaging.tests.support import (TempdirManager, LoggingCatcher, + fake_dec) + +try: + import _thread + from packaging.tests.pypi_server import (use_pypi_server, PyPIServer, + PYPI_DEFAULT_STATIC_PATH) +except ImportError: + _thread = None + use_pypi_server = fake_dec + PYPI_DEFAULT_STATIC_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'pypiserver') + + + +class SimpleCrawlerTestCase(TempdirManager, + LoggingCatcher, + unittest.TestCase): + + def _get_simple_crawler(self, server, base_url="/simple/", hosts=None, + *args, **kwargs): + """Build and return a SimpleIndex with the test server urls""" + if hosts is None: + hosts = (server.full_address.replace("http://", ""),) + kwargs['hosts'] = hosts + return Crawler(server.full_address + base_url, *args, + **kwargs) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server() + def test_bad_urls(self, server): + crawler = Crawler() + url = 'http://127.0.0.1:0/nonesuch/test_simple' + try: + v = crawler._open_url(url) + except Exception as v: + self.assertIn(url, str(v)) + else: + v.close() + self.assertIsInstance(v, urllib.error.HTTPError) + + # issue 16 + # easy_install inquant.contentmirror.plone breaks because of a typo + # in its home URL + crawler = Crawler(hosts=('example.org',)) + url = ('url:%20https://svn.plone.org/svn/collective/' + 'inquant.contentmirror.plone/trunk') + try: + v = crawler._open_url(url) + except Exception as v: + self.assertIn(url, str(v)) + else: + v.close() + self.assertIsInstance(v, urllib.error.HTTPError) + + def _urlopen(*args): + raise http.client.BadStatusLine('line') + + old_urlopen = urllib.request.urlopen + urllib.request.urlopen = _urlopen + url = 'http://example.org' + try: + v = crawler._open_url(url) + except Exception as v: + self.assertIn('line', str(v)) + else: + v.close() + # TODO use self.assertRaises + raise AssertionError('Should have raise here!') + finally: + urllib.request.urlopen = old_urlopen + + # issue 20 + url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk' + try: + crawler._open_url(url) + except Exception as v: + self.assertIn('Download error', str(v)) + + # issue #160 + url = server.full_address + page = ('<a href="http://www.famfamfam.com](' + 'http://www.famfamfam.com/">') + crawler._process_url(url, page) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server("test_found_links") + def test_found_links(self, server): + # Browse the index, asking for a specified release version + # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1 + crawler = self._get_simple_crawler(server) + last_release = crawler.get_release("foobar") + + # we have scanned the index page + self.assertIn(server.full_address + "/simple/foobar/", + crawler._processed_urls) + + # we have found 4 releases in this page + self.assertEqual(len(crawler._projects["foobar"]), 4) + + # and returned the most recent one + self.assertEqual("%s" % last_release.version, '2.0.1') + + def test_is_browsable(self): + crawler = Crawler(follow_externals=False) + self.assertTrue(crawler._is_browsable(crawler.index_url + "test")) + + # Now, when following externals, we can have a list of hosts to trust. + # and don't follow other external links than the one described here. + crawler = Crawler(hosts=["pypi.python.org", "example.org"], + follow_externals=True) + good_urls = ( + "http://pypi.python.org/foo/bar", + "http://pypi.python.org/simple/foobar", + "http://example.org", + "http://example.org/", + "http://example.org/simple/", + ) + bad_urls = ( + "http://python.org", + "http://example.tld", + ) + + for url in good_urls: + self.assertTrue(crawler._is_browsable(url)) + + for url in bad_urls: + self.assertFalse(crawler._is_browsable(url)) + + # allow all hosts + crawler = Crawler(follow_externals=True, hosts=("*",)) + self.assertTrue(crawler._is_browsable("http://an-external.link/path")) + self.assertTrue(crawler._is_browsable("pypi.example.org/a/path")) + + # specify a list of hosts we want to allow + crawler = Crawler(follow_externals=True, + hosts=("*.example.org",)) + self.assertFalse(crawler._is_browsable("http://an-external.link/path")) + self.assertTrue( + crawler._is_browsable("http://pypi.example.org/a/path")) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server("with_externals") + def test_follow_externals(self, server): + # Include external pages + # Try to request the package index, wich contains links to "externals" + # resources. They have to be scanned too. + crawler = self._get_simple_crawler(server, follow_externals=True) + crawler.get_release("foobar") + self.assertIn(server.full_address + "/external/external.html", + crawler._processed_urls) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server("with_real_externals") + def test_restrict_hosts(self, server): + # Only use a list of allowed hosts is possible + # Test that telling the simple pyPI client to not retrieve external + # works + crawler = self._get_simple_crawler(server, follow_externals=False) + crawler.get_release("foobar") + self.assertNotIn(server.full_address + "/external/external.html", + crawler._processed_urls) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server(static_filesystem_paths=["with_externals"], + static_uri_paths=["simple", "external"]) + def test_links_priority(self, server): + # Download links from the pypi simple index should be used before + # external download links. + # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error + # + # Usecase : + # - someone uploads a package on pypi, a md5 is generated + # - someone manually coindexes this link (with the md5 in the url) onto + # an external page accessible from the package page. + # - someone reuploads the package (with a different md5) + # - while easy_installing, an MD5 error occurs because the external + # link is used + # -> The index should use the link from pypi, not the external one. + + # start an index server + index_url = server.full_address + '/simple/' + + # scan a test index + crawler = Crawler(index_url, follow_externals=True) + releases = crawler.get_releases("foobar") + server.stop() + + # we have only one link, because links are compared without md5 + self.assertEqual(1, len(releases)) + self.assertEqual(1, len(releases[0].dists)) + # the link should be from the index + self.assertEqual(2, len(releases[0].dists['sdist'].urls)) + self.assertEqual('12345678901234567', + releases[0].dists['sdist'].url['hashval']) + self.assertEqual('md5', releases[0].dists['sdist'].url['hashname']) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server(static_filesystem_paths=["with_norel_links"], + static_uri_paths=["simple", "external"]) + def test_not_scan_all_links(self, server): + # Do not follow all index page links. + # The links not tagged with rel="download" and rel="homepage" have + # to not be processed by the package index, while processing "pages". + + # process the pages + crawler = self._get_simple_crawler(server, follow_externals=True) + crawler.get_releases("foobar") + # now it should have processed only pages with links rel="download" + # and rel="homepage" + self.assertIn("%s/simple/foobar/" % server.full_address, + crawler._processed_urls) # it's the simple index page + self.assertIn("%s/external/homepage.html" % server.full_address, + crawler._processed_urls) # the external homepage is rel="homepage" + self.assertNotIn("%s/external/nonrel.html" % server.full_address, + crawler._processed_urls) # this link contains no rel=* + self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address, + crawler._processed_urls) # linked from simple index (no rel) + self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address, + crawler._processed_urls) # linked from simple index (rel) + self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address, + crawler._processed_urls) # linked from external homepage (rel) + + @unittest.skipIf(_thread is None, 'needs threads') + def test_uses_mirrors(self): + # When the main repository seems down, try using the given mirrors""" + server = PyPIServer("foo_bar_baz") + mirror = PyPIServer("foo_bar_baz") + mirror.start() # we dont start the server here + + try: + # create the index using both servers + crawler = Crawler(server.full_address + "/simple/", hosts=('*',), + # set the timeout to 1s for the tests + timeout=1, mirrors=[mirror.full_address]) + + # this should not raise a timeout + self.assertEqual(4, len(crawler.get_releases("foo"))) + finally: + mirror.stop() + server.stop() + + def test_simple_link_matcher(self): + # Test that the simple link matcher finds the right links""" + crawler = Crawler(follow_externals=False) + + # Here, we define: + # 1. one link that must be followed, cause it's a download one + # 2. one link that must *not* be followed, cause the is_browsable + # returns false for it. + # 3. one link that must be followed cause it's a homepage that is + # browsable + # 4. one link that must be followed, because it contain a md5 hash + self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url)) + self.assertFalse(crawler._is_browsable("http://dl-link2")) + content = """ + <a href="http://dl-link1" rel="download">download_link1</a> + <a href="http://dl-link2" rel="homepage">homepage_link1</a> + <a href="%(index_url)stest" rel="homepage">homepage_link2</a> + <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a> + """ % {'index_url': crawler.index_url} + + # Test that the simple link matcher yield the good links. + generator = crawler._simple_link_matcher(content, crawler.index_url) + self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % + crawler.index_url, True), next(generator)) + self.assertEqual(('http://dl-link1', True), next(generator)) + self.assertEqual(('%stest' % crawler.index_url, False), + next(generator)) + self.assertRaises(StopIteration, generator.__next__) + + # Follow the external links is possible (eg. homepages) + crawler.follow_externals = True + generator = crawler._simple_link_matcher(content, crawler.index_url) + self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % + crawler.index_url, True), next(generator)) + self.assertEqual(('http://dl-link1', True), next(generator)) + self.assertEqual(('http://dl-link2', False), next(generator)) + self.assertEqual(('%stest' % crawler.index_url, False), + next(generator)) + self.assertRaises(StopIteration, generator.__next__) + + def test_browse_local_files(self): + # Test that we can browse local files""" + index_url = "file://" + PYPI_DEFAULT_STATIC_PATH + if sys.platform == 'win32': + # under windows the correct syntax is: + # file:///C|\the\path\here + # instead of + # file://C:\the\path\here + fix = re.compile(r'^(file://)([A-Za-z])(:)') + index_url = fix.sub('\\1/\\2|', index_url) + + index_path = os.sep.join([index_url, "test_found_links", "simple"]) + crawler = Crawler(index_path) + dists = crawler.get_releases("foobar") + self.assertEqual(4, len(dists)) + + def test_get_link_matcher(self): + crawler = Crawler("http://example.org") + self.assertEqual('_simple_link_matcher', crawler._get_link_matcher( + "http://example.org/some/file").__name__) + self.assertEqual('_default_link_matcher', crawler._get_link_matcher( + "http://other-url").__name__) + + def test_default_link_matcher(self): + crawler = Crawler("http://example.org", mirrors=[]) + crawler.follow_externals = True + crawler._is_browsable = lambda *args: True + base_url = "http://example.org/some/file/" + content = """ +<a href="../homepage" rel="homepage">link</a> +<a href="../download" rel="download">link2</a> +<a href="../simpleurl">link2</a> + """ + found_links = set(uri for uri, _ in + crawler._default_link_matcher(content, base_url)) + self.assertIn('http://example.org/some/homepage', found_links) + self.assertIn('http://example.org/some/simpleurl', found_links) + self.assertIn('http://example.org/some/download', found_links) + + @unittest.skipIf(_thread is None, 'needs threads') + @use_pypi_server("project_list") + def test_search_projects(self, server): + # we can search the index for some projects, on their names + # the case used no matters here + crawler = self._get_simple_crawler(server) + tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']), + ('foobar*', ['FooBar-bar', 'Foobar-baz']), + ('*foobar', ['Baz-FooBar'])) + + for search, expected in tests: + projects = [p.name for p in crawler.search_projects(search)] + self.assertListEqual(expected, projects) + + +def test_suite(): + return unittest.makeSuite(SimpleCrawlerTestCase) + +if __name__ == '__main__': + unittest.main(defaultTest="test_suite") |