summaryrefslogtreecommitdiff
path: root/Lib/packaging/tests/test_pypi_simple.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/packaging/tests/test_pypi_simple.py')
-rw-r--r--Lib/packaging/tests/test_pypi_simple.py353
1 files changed, 353 insertions, 0 deletions
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py
new file mode 100644
index 0000000000..59204c4b8d
--- /dev/null
+++ b/Lib/packaging/tests/test_pypi_simple.py
@@ -0,0 +1,353 @@
+"""Tests for the packaging.pypi.simple module."""
+import re
+import os
+import sys
+import http.client
+import urllib.error
+import urllib.parse
+import urllib.request
+
+from packaging.pypi.simple import Crawler
+
+from packaging.tests import unittest
+from packaging.tests.support import (TempdirManager, LoggingCatcher,
+ fake_dec)
+
+try:
+ import _thread
+ from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
+ PYPI_DEFAULT_STATIC_PATH)
+except ImportError:
+ _thread = None
+ use_pypi_server = fake_dec
+ PYPI_DEFAULT_STATIC_PATH = os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
+
+
+
+class SimpleCrawlerTestCase(TempdirManager,
+ LoggingCatcher,
+ unittest.TestCase):
+
+ def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
+ *args, **kwargs):
+ """Build and return a SimpleIndex with the test server urls"""
+ if hosts is None:
+ hosts = (server.full_address.replace("http://", ""),)
+ kwargs['hosts'] = hosts
+ return Crawler(server.full_address + base_url, *args,
+ **kwargs)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server()
+ def test_bad_urls(self, server):
+ crawler = Crawler()
+ url = 'http://127.0.0.1:0/nonesuch/test_simple'
+ try:
+ v = crawler._open_url(url)
+ except Exception as v:
+ self.assertIn(url, str(v))
+ else:
+ v.close()
+ self.assertIsInstance(v, urllib.error.HTTPError)
+
+ # issue 16
+ # easy_install inquant.contentmirror.plone breaks because of a typo
+ # in its home URL
+ crawler = Crawler(hosts=('example.org',))
+ url = ('url:%20https://svn.plone.org/svn/collective/'
+ 'inquant.contentmirror.plone/trunk')
+ try:
+ v = crawler._open_url(url)
+ except Exception as v:
+ self.assertIn(url, str(v))
+ else:
+ v.close()
+ self.assertIsInstance(v, urllib.error.HTTPError)
+
+ def _urlopen(*args):
+ raise http.client.BadStatusLine('line')
+
+ old_urlopen = urllib.request.urlopen
+ urllib.request.urlopen = _urlopen
+ url = 'http://example.org'
+ try:
+ v = crawler._open_url(url)
+ except Exception as v:
+ self.assertIn('line', str(v))
+ else:
+ v.close()
+ # TODO use self.assertRaises
+ raise AssertionError('Should have raise here!')
+ finally:
+ urllib.request.urlopen = old_urlopen
+
+ # issue 20
+ url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
+ try:
+ crawler._open_url(url)
+ except Exception as v:
+ self.assertIn('Download error', str(v))
+
+ # issue #160
+ url = server.full_address
+ page = ('<a href="http://www.famfamfam.com]('
+ 'http://www.famfamfam.com/">')
+ crawler._process_url(url, page)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server("test_found_links")
+ def test_found_links(self, server):
+ # Browse the index, asking for a specified release version
+ # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
+ crawler = self._get_simple_crawler(server)
+ last_release = crawler.get_release("foobar")
+
+ # we have scanned the index page
+ self.assertIn(server.full_address + "/simple/foobar/",
+ crawler._processed_urls)
+
+ # we have found 4 releases in this page
+ self.assertEqual(len(crawler._projects["foobar"]), 4)
+
+ # and returned the most recent one
+ self.assertEqual("%s" % last_release.version, '2.0.1')
+
+ def test_is_browsable(self):
+ crawler = Crawler(follow_externals=False)
+ self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
+
+ # Now, when following externals, we can have a list of hosts to trust.
+ # and don't follow other external links than the one described here.
+ crawler = Crawler(hosts=["pypi.python.org", "example.org"],
+ follow_externals=True)
+ good_urls = (
+ "http://pypi.python.org/foo/bar",
+ "http://pypi.python.org/simple/foobar",
+ "http://example.org",
+ "http://example.org/",
+ "http://example.org/simple/",
+ )
+ bad_urls = (
+ "http://python.org",
+ "http://example.tld",
+ )
+
+ for url in good_urls:
+ self.assertTrue(crawler._is_browsable(url))
+
+ for url in bad_urls:
+ self.assertFalse(crawler._is_browsable(url))
+
+ # allow all hosts
+ crawler = Crawler(follow_externals=True, hosts=("*",))
+ self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
+ self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
+
+ # specify a list of hosts we want to allow
+ crawler = Crawler(follow_externals=True,
+ hosts=("*.example.org",))
+ self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
+ self.assertTrue(
+ crawler._is_browsable("http://pypi.example.org/a/path"))
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server("with_externals")
+ def test_follow_externals(self, server):
+ # Include external pages
+ # Try to request the package index, wich contains links to "externals"
+ # resources. They have to be scanned too.
+ crawler = self._get_simple_crawler(server, follow_externals=True)
+ crawler.get_release("foobar")
+ self.assertIn(server.full_address + "/external/external.html",
+ crawler._processed_urls)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server("with_real_externals")
+ def test_restrict_hosts(self, server):
+ # Only use a list of allowed hosts is possible
+ # Test that telling the simple pyPI client to not retrieve external
+ # works
+ crawler = self._get_simple_crawler(server, follow_externals=False)
+ crawler.get_release("foobar")
+ self.assertNotIn(server.full_address + "/external/external.html",
+ crawler._processed_urls)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server(static_filesystem_paths=["with_externals"],
+ static_uri_paths=["simple", "external"])
+ def test_links_priority(self, server):
+ # Download links from the pypi simple index should be used before
+ # external download links.
+ # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
+ #
+ # Usecase :
+ # - someone uploads a package on pypi, a md5 is generated
+ # - someone manually coindexes this link (with the md5 in the url) onto
+ # an external page accessible from the package page.
+ # - someone reuploads the package (with a different md5)
+ # - while easy_installing, an MD5 error occurs because the external
+ # link is used
+ # -> The index should use the link from pypi, not the external one.
+
+ # start an index server
+ index_url = server.full_address + '/simple/'
+
+ # scan a test index
+ crawler = Crawler(index_url, follow_externals=True)
+ releases = crawler.get_releases("foobar")
+ server.stop()
+
+ # we have only one link, because links are compared without md5
+ self.assertEqual(1, len(releases))
+ self.assertEqual(1, len(releases[0].dists))
+ # the link should be from the index
+ self.assertEqual(2, len(releases[0].dists['sdist'].urls))
+ self.assertEqual('12345678901234567',
+ releases[0].dists['sdist'].url['hashval'])
+ self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server(static_filesystem_paths=["with_norel_links"],
+ static_uri_paths=["simple", "external"])
+ def test_not_scan_all_links(self, server):
+ # Do not follow all index page links.
+ # The links not tagged with rel="download" and rel="homepage" have
+ # to not be processed by the package index, while processing "pages".
+
+ # process the pages
+ crawler = self._get_simple_crawler(server, follow_externals=True)
+ crawler.get_releases("foobar")
+ # now it should have processed only pages with links rel="download"
+ # and rel="homepage"
+ self.assertIn("%s/simple/foobar/" % server.full_address,
+ crawler._processed_urls) # it's the simple index page
+ self.assertIn("%s/external/homepage.html" % server.full_address,
+ crawler._processed_urls) # the external homepage is rel="homepage"
+ self.assertNotIn("%s/external/nonrel.html" % server.full_address,
+ crawler._processed_urls) # this link contains no rel=*
+ self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
+ crawler._processed_urls) # linked from simple index (no rel)
+ self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
+ crawler._processed_urls) # linked from simple index (rel)
+ self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
+ crawler._processed_urls) # linked from external homepage (rel)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ def test_uses_mirrors(self):
+ # When the main repository seems down, try using the given mirrors"""
+ server = PyPIServer("foo_bar_baz")
+ mirror = PyPIServer("foo_bar_baz")
+ mirror.start() # we dont start the server here
+
+ try:
+ # create the index using both servers
+ crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
+ # set the timeout to 1s for the tests
+ timeout=1, mirrors=[mirror.full_address])
+
+ # this should not raise a timeout
+ self.assertEqual(4, len(crawler.get_releases("foo")))
+ finally:
+ mirror.stop()
+ server.stop()
+
+ def test_simple_link_matcher(self):
+ # Test that the simple link matcher finds the right links"""
+ crawler = Crawler(follow_externals=False)
+
+ # Here, we define:
+ # 1. one link that must be followed, cause it's a download one
+ # 2. one link that must *not* be followed, cause the is_browsable
+ # returns false for it.
+ # 3. one link that must be followed cause it's a homepage that is
+ # browsable
+ # 4. one link that must be followed, because it contain a md5 hash
+ self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
+ self.assertFalse(crawler._is_browsable("http://dl-link2"))
+ content = """
+ <a href="http://dl-link1" rel="download">download_link1</a>
+ <a href="http://dl-link2" rel="homepage">homepage_link1</a>
+ <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
+ <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
+ """ % {'index_url': crawler.index_url}
+
+ # Test that the simple link matcher yield the good links.
+ generator = crawler._simple_link_matcher(content, crawler.index_url)
+ self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
+ crawler.index_url, True), next(generator))
+ self.assertEqual(('http://dl-link1', True), next(generator))
+ self.assertEqual(('%stest' % crawler.index_url, False),
+ next(generator))
+ self.assertRaises(StopIteration, generator.__next__)
+
+ # Follow the external links is possible (eg. homepages)
+ crawler.follow_externals = True
+ generator = crawler._simple_link_matcher(content, crawler.index_url)
+ self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
+ crawler.index_url, True), next(generator))
+ self.assertEqual(('http://dl-link1', True), next(generator))
+ self.assertEqual(('http://dl-link2', False), next(generator))
+ self.assertEqual(('%stest' % crawler.index_url, False),
+ next(generator))
+ self.assertRaises(StopIteration, generator.__next__)
+
+ def test_browse_local_files(self):
+ # Test that we can browse local files"""
+ index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
+ if sys.platform == 'win32':
+ # under windows the correct syntax is:
+ # file:///C|\the\path\here
+ # instead of
+ # file://C:\the\path\here
+ fix = re.compile(r'^(file://)([A-Za-z])(:)')
+ index_url = fix.sub('\\1/\\2|', index_url)
+
+ index_path = os.sep.join([index_url, "test_found_links", "simple"])
+ crawler = Crawler(index_path)
+ dists = crawler.get_releases("foobar")
+ self.assertEqual(4, len(dists))
+
+ def test_get_link_matcher(self):
+ crawler = Crawler("http://example.org")
+ self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
+ "http://example.org/some/file").__name__)
+ self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
+ "http://other-url").__name__)
+
+ def test_default_link_matcher(self):
+ crawler = Crawler("http://example.org", mirrors=[])
+ crawler.follow_externals = True
+ crawler._is_browsable = lambda *args: True
+ base_url = "http://example.org/some/file/"
+ content = """
+<a href="../homepage" rel="homepage">link</a>
+<a href="../download" rel="download">link2</a>
+<a href="../simpleurl">link2</a>
+ """
+ found_links = set(uri for uri, _ in
+ crawler._default_link_matcher(content, base_url))
+ self.assertIn('http://example.org/some/homepage', found_links)
+ self.assertIn('http://example.org/some/simpleurl', found_links)
+ self.assertIn('http://example.org/some/download', found_links)
+
+ @unittest.skipIf(_thread is None, 'needs threads')
+ @use_pypi_server("project_list")
+ def test_search_projects(self, server):
+ # we can search the index for some projects, on their names
+ # the case used no matters here
+ crawler = self._get_simple_crawler(server)
+ tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
+ ('foobar*', ['FooBar-bar', 'Foobar-baz']),
+ ('*foobar', ['Baz-FooBar']))
+
+ for search, expected in tests:
+ projects = [p.name for p in crawler.search_projects(search)]
+ self.assertListEqual(expected, projects)
+
+
+def test_suite():
+ return unittest.makeSuite(SimpleCrawlerTestCase)
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="test_suite")