1 files changed, 353 insertions, 0 deletions
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py
new file mode 100644
index 0000000000..59204c4b8d
--- /dev/null
+++ b/Lib/packaging/tests/test_pypi_simple.py
@@ -0,0 +1,353 @@
+"""Tests for the packaging.pypi.simple module."""
+import re
+import os
+import sys
+import http.client
+import urllib.error
+import urllib.parse
+import urllib.request
+
+from packaging.pypi.simple import Crawler
+
+from packaging.tests import unittest
+from packaging.tests.support import (TempdirManager, LoggingCatcher,
+                                     fake_dec)
+
+try:
+    import _thread
+    from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
+                                             PYPI_DEFAULT_STATIC_PATH)
+except ImportError:
+    _thread = None
+    use_pypi_server = fake_dec
+    PYPI_DEFAULT_STATIC_PATH = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
+
+
+
+class SimpleCrawlerTestCase(TempdirManager,
+                            LoggingCatcher,
+                            unittest.TestCase):
+
+    def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
+                            *args, **kwargs):
+        """Build and return a SimpleIndex with the test server urls"""
+        if hosts is None:
+            hosts = (server.full_address.replace("http://", ""),)
+        kwargs['hosts'] = hosts
+        return Crawler(server.full_address + base_url, *args,
+                       **kwargs)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server()
+    def test_bad_urls(self, server):
+        crawler = Crawler()
+        url = 'http://127.0.0.1:0/nonesuch/test_simple'
+        try:
+            v = crawler._open_url(url)
+        except Exception as v:
+            self.assertIn(url, str(v))
+        else:
+            v.close()
+            self.assertIsInstance(v, urllib.error.HTTPError)
+
+        # issue 16
+        # easy_install inquant.contentmirror.plone breaks because of a typo
+        # in its home URL
+        crawler = Crawler(hosts=('example.org',))
+        url = ('url:%20https://svn.plone.org/svn/collective/'
+               'inquant.contentmirror.plone/trunk')
+        try:
+            v = crawler._open_url(url)
+        except Exception as v:
+            self.assertIn(url, str(v))
+        else:
+            v.close()
+            self.assertIsInstance(v, urllib.error.HTTPError)
+
+        def _urlopen(*args):
+            raise http.client.BadStatusLine('line')
+
+        old_urlopen = urllib.request.urlopen
+        urllib.request.urlopen = _urlopen
+        url = 'http://example.org'
+        try:
+            v = crawler._open_url(url)
+        except Exception as v:
+            self.assertIn('line', str(v))
+        else:
+            v.close()
+            # TODO use self.assertRaises
+            raise AssertionError('Should have raise here!')
+        finally:
+            urllib.request.urlopen = old_urlopen
+
+        # issue 20
+        url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
+        try:
+            crawler._open_url(url)
+        except Exception as v:
+            self.assertIn('Download error', str(v))
+
+        # issue #160
+        url = server.full_address
+        page = ('<a href="http://www.famfamfam.com]('
+                'http://www.famfamfam.com/">')
+        crawler._process_url(url, page)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server("test_found_links")
+    def test_found_links(self, server):
+        # Browse the index, asking for a specified release version
+        # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
+        crawler = self._get_simple_crawler(server)
+        last_release = crawler.get_release("foobar")
+
+        # we have scanned the index page
+        self.assertIn(server.full_address + "/simple/foobar/",
+            crawler._processed_urls)
+
+        # we have found 4 releases in this page
+        self.assertEqual(len(crawler._projects["foobar"]), 4)
+
+        # and returned the most recent one
+        self.assertEqual("%s" % last_release.version, '2.0.1')
+
+    def test_is_browsable(self):
+        crawler = Crawler(follow_externals=False)
+        self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
+
+        # Now, when following externals, we can have a list of hosts to trust.
+        # and don't follow other external links than the one described here.
+        crawler = Crawler(hosts=["pypi.python.org", "example.org"],
+                          follow_externals=True)
+        good_urls = (
+            "http://pypi.python.org/foo/bar",
+            "http://pypi.python.org/simple/foobar",
+            "http://example.org",
+            "http://example.org/",
+            "http://example.org/simple/",
+        )
+        bad_urls = (
+            "http://python.org",
+            "http://example.tld",
+        )
+
+        for url in good_urls:
+            self.assertTrue(crawler._is_browsable(url))
+
+        for url in bad_urls:
+            self.assertFalse(crawler._is_browsable(url))
+
+        # allow all hosts
+        crawler = Crawler(follow_externals=True, hosts=("*",))
+        self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
+        self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
+
+        # specify a list of hosts we want to allow
+        crawler = Crawler(follow_externals=True,
+                          hosts=("*.example.org",))
+        self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
+        self.assertTrue(
+            crawler._is_browsable("http://pypi.example.org/a/path"))
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server("with_externals")
+    def test_follow_externals(self, server):
+        # Include external pages
+        # Try to request the package index, wich contains links to "externals"
+        # resources. They have to  be scanned too.
+        crawler = self._get_simple_crawler(server, follow_externals=True)
+        crawler.get_release("foobar")
+        self.assertIn(server.full_address + "/external/external.html",
+            crawler._processed_urls)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server("with_real_externals")
+    def test_restrict_hosts(self, server):
+        # Only use a list of allowed hosts is possible
+        # Test that telling the simple pyPI client to not retrieve external
+        # works
+        crawler = self._get_simple_crawler(server, follow_externals=False)
+        crawler.get_release("foobar")
+        self.assertNotIn(server.full_address + "/external/external.html",
+            crawler._processed_urls)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server(static_filesystem_paths=["with_externals"],
+        static_uri_paths=["simple", "external"])
+    def test_links_priority(self, server):
+        # Download links from the pypi simple index should be used before
+        # external download links.
+        # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
+        #
+        # Usecase :
+        # - someone uploads a package on pypi, a md5 is generated
+        # - someone manually coindexes this link (with the md5 in the url) onto
+        #   an external page accessible from the package page.
+        # - someone reuploads the package (with a different md5)
+        # - while easy_installing, an MD5 error occurs because the external
+        # link is used
+        # -> The index should use the link from pypi, not the external one.
+
+        # start an index server
+        index_url = server.full_address + '/simple/'
+
+        # scan a test index
+        crawler = Crawler(index_url, follow_externals=True)
+        releases = crawler.get_releases("foobar")
+        server.stop()
+
+        # we have only one link, because links are compared without md5
+        self.assertEqual(1, len(releases))
+        self.assertEqual(1, len(releases[0].dists))
+        # the link should be from the index
+        self.assertEqual(2, len(releases[0].dists['sdist'].urls))
+        self.assertEqual('12345678901234567',
+                         releases[0].dists['sdist'].url['hashval'])
+        self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server(static_filesystem_paths=["with_norel_links"],
+        static_uri_paths=["simple", "external"])
+    def test_not_scan_all_links(self, server):
+        # Do not follow all index page links.
+        # The links not tagged with rel="download" and rel="homepage" have
+        # to not be processed by the package index, while processing "pages".
+
+        # process the pages
+        crawler = self._get_simple_crawler(server, follow_externals=True)
+        crawler.get_releases("foobar")
+        # now it should have processed only pages with links rel="download"
+        # and rel="homepage"
+        self.assertIn("%s/simple/foobar/" % server.full_address,
+            crawler._processed_urls)  # it's the simple index page
+        self.assertIn("%s/external/homepage.html" % server.full_address,
+            crawler._processed_urls)  # the external homepage is rel="homepage"
+        self.assertNotIn("%s/external/nonrel.html" % server.full_address,
+            crawler._processed_urls)  # this link contains no rel=*
+        self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
+            crawler._processed_urls)  # linked from simple index (no rel)
+        self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
+            crawler._processed_urls)  # linked from simple index (rel)
+        self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
+            crawler._processed_urls)  # linked from external homepage (rel)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    def test_uses_mirrors(self):
+        # When the main repository seems down, try using the given mirrors"""
+        server = PyPIServer("foo_bar_baz")
+        mirror = PyPIServer("foo_bar_baz")
+        mirror.start()  # we dont start the server here
+
+        try:
+            # create the index using both servers
+            crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
+                              # set the timeout to 1s for the tests
+                              timeout=1, mirrors=[mirror.full_address])
+
+            # this should not raise a timeout
+            self.assertEqual(4, len(crawler.get_releases("foo")))
+        finally:
+            mirror.stop()
+            server.stop()
+
+    def test_simple_link_matcher(self):
+        # Test that the simple link matcher finds the right links"""
+        crawler = Crawler(follow_externals=False)
+
+        # Here, we define:
+        #   1. one link that must be followed, cause it's a download one
+        #   2. one link that must *not* be followed, cause the is_browsable
+        #      returns false for it.
+        #   3. one link that must be followed cause it's a homepage that is
+        #      browsable
+        #   4. one link that must be followed, because it contain a md5 hash
+        self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
+        self.assertFalse(crawler._is_browsable("http://dl-link2"))
+        content = """
+        <a href="http://dl-link1" rel="download">download_link1</a>
+        <a href="http://dl-link2" rel="homepage">homepage_link1</a>
+        <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
+        <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
+        """ % {'index_url': crawler.index_url}
+
+        # Test that the simple link matcher yield the good links.
+        generator = crawler._simple_link_matcher(content, crawler.index_url)
+        self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
+                          crawler.index_url, True), next(generator))
+        self.assertEqual(('http://dl-link1', True), next(generator))
+        self.assertEqual(('%stest' % crawler.index_url, False),
+                         next(generator))
+        self.assertRaises(StopIteration, generator.__next__)
+
+        # Follow the external links is possible (eg. homepages)
+        crawler.follow_externals = True
+        generator = crawler._simple_link_matcher(content, crawler.index_url)
+        self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
+                          crawler.index_url, True), next(generator))
+        self.assertEqual(('http://dl-link1', True), next(generator))
+        self.assertEqual(('http://dl-link2', False), next(generator))
+        self.assertEqual(('%stest' % crawler.index_url, False),
+                         next(generator))
+        self.assertRaises(StopIteration, generator.__next__)
+
+    def test_browse_local_files(self):
+        # Test that we can browse local files"""
+        index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
+        if sys.platform == 'win32':
+            # under windows the correct syntax is:
+            #   file:///C|\the\path\here
+            # instead of
+            #   file://C:\the\path\here
+            fix = re.compile(r'^(file://)([A-Za-z])(:)')
+            index_url = fix.sub('\\1/\\2|', index_url)
+
+        index_path = os.sep.join([index_url, "test_found_links", "simple"])
+        crawler = Crawler(index_path)
+        dists = crawler.get_releases("foobar")
+        self.assertEqual(4, len(dists))
+
+    def test_get_link_matcher(self):
+        crawler = Crawler("http://example.org")
+        self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
+                         "http://example.org/some/file").__name__)
+        self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
+                         "http://other-url").__name__)
+
+    def test_default_link_matcher(self):
+        crawler = Crawler("http://example.org", mirrors=[])
+        crawler.follow_externals = True
+        crawler._is_browsable = lambda *args: True
+        base_url = "http://example.org/some/file/"
+        content = """
+<a href="../homepage" rel="homepage">link</a>
+<a href="../download" rel="download">link2</a>
+<a href="../simpleurl">link2</a>
+        """
+        found_links = set(uri for uri, _ in
+                          crawler._default_link_matcher(content, base_url))
+        self.assertIn('http://example.org/some/homepage', found_links)
+        self.assertIn('http://example.org/some/simpleurl', found_links)
+        self.assertIn('http://example.org/some/download', found_links)
+
+    @unittest.skipIf(_thread is None, 'needs threads')
+    @use_pypi_server("project_list")
+    def test_search_projects(self, server):
+        # we can search the index for some projects, on their names
+        # the case used no matters here
+        crawler = self._get_simple_crawler(server)
+        tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
+                 ('foobar*', ['FooBar-bar', 'Foobar-baz']),
+                 ('*foobar', ['Baz-FooBar']))
+
+        for search, expected in tests:
+            projects = [p.name for p in crawler.search_projects(search)]
+            self.assertListEqual(expected, projects)
+
+
+def test_suite():
+    return unittest.makeSuite(SimpleCrawlerTestCase)
+
+if __name__ == '__main__':
+    unittest.main(defaultTest="test_suite")