1 files changed, 158 insertions, 49 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 0035e7067f..9cbf8aaf5a 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -89,14 +89,16 @@ import http.client
 import io
 import os
 import posixpath
-import random
 import re
 import socket
 import sys
 import time
 import collections
+import tempfile
+import contextlib
 import warnings
 
+
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
@@ -112,21 +114,40 @@ except ImportError:
 else:
     _have_ssl = True
 
+__all__ = [
+    # Classes
+    'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
+    'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
+    'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
+    'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
+    'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
+    'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+    'UnknownHandler', 'HTTPErrorProcessor',
+    # Functions
+    'urlopen', 'install_opener', 'build_opener',
+    'pathname2url', 'url2pathname', 'getproxies',
+    # Legacy interface
+    'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
+]
+
 # used in User-Agent header sent
 __version__ = sys.version[:3]
 
 _opener = None
 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
-            *, cafile=None, capath=None):
+            *, cafile=None, capath=None, cadefault=False):
     global _opener
-    if cafile or capath:
+    if cafile or capath or cadefault:
         if not _have_ssl:
             raise ValueError('SSL support not available')
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context.options |= ssl.OP_NO_SSLv2
-        if cafile or capath:
+        if cafile or capath or cadefault:
             context.verify_mode = ssl.CERT_REQUIRED
-            context.load_verify_locations(cafile, capath)
+            if cafile or capath:
+                context.load_verify_locations(cafile, capath)
+            else:
+                context.set_default_verify_paths()
             check_hostname = True
         else:
             check_hostname = False
@@ -142,17 +163,78 @@ def install_opener(opener):
     global _opener
     _opener = opener
 
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
 def urlretrieve(url, filename=None, reporthook=None, data=None):
-    global _urlopener
-    if not _urlopener:
-        _urlopener = FancyURLopener()
-    return _urlopener.retrieve(url, filename, reporthook, data)
+    """
+    Retrieve a URL into a temporary location on disk.
+
+    Requires a URL argument. If a filename is passed, it is used as
+    the temporary file location. The reporthook argument should be
+    a callable that accepts a block number, a read size, and the
+    total file size of the URL target. The data argument should be
+    valid URL encoded data.
+
+    If a filename is passed and the URL points to a local resource,
+    the result is a copy from local file to new file.
+
+    Returns a tuple containing the path to the newly created
+    data file as well as the resulting HTTPMessage object.
+    """
+    url_type, path = splittype(url)
+
+    with contextlib.closing(urlopen(url, data)) as fp:
+        headers = fp.info()
+
+        # Just return the local path and the "headers" for file://
+        # URLs. No sense in performing a copy unless requested.
+        if url_type == "file" and not filename:
+            return os.path.normpath(path), headers
+
+        # Handle temporary file setup.
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            tfp = tempfile.NamedTemporaryFile(delete=False)
+            filename = tfp.name
+            _url_tempfiles.append(filename)
+
+        with tfp:
+            result = filename, headers
+            bs = 1024*8
+            size = -1
+            read = 0
+            blocknum = 0
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+
+            if reporthook:
+                reporthook(blocknum, 0, size)
+
+            while True:
+                block = fp.read(bs)
+                if not block:
+                    break
+                read += len(block)
+                tfp.write(block)
+                blocknum += 1
+                if reporthook:
+                    reporthook(blocknum, len(block), size)
+
+    if size >= 0 and read < size:
+        raise ContentTooShortError(
+            "retrieval incomplete: got only %i out of %i bytes"
+            % (read, size), result)
+
+    return result
 
 def urlcleanup():
-    if _urlopener:
-        _urlopener.cleanup()
+    for temp_file in _url_tempfiles:
+        try:
+            os.unlink(temp_file)
+        except EnvironmentError:
+            pass
+
+    del _url_tempfiles[:]
     global _opener
     if _opener:
         _opener = None
@@ -178,7 +260,8 @@ def request_host(request):
 class Request:
 
     def __init__(self, url, data=None, headers={},
-                 origin_req_host=None, unverifiable=False):
+                 origin_req_host=None, unverifiable=False,
+                 method=None):
         # unwrap('<URL:type://host/path>') --> 'type://host/path'
         self.full_url = unwrap(url)
         self.full_url, self.fragment = splittag(self.full_url)
@@ -192,6 +275,7 @@ class Request:
             origin_req_host = request_host(self)
         self.origin_req_host = origin_req_host
         self.unverifiable = unverifiable
+        self.method = method
         self._parse()
 
     def _parse(self):
@@ -203,41 +287,60 @@ class Request:
             self.host = unquote(self.host)
 
     def get_method(self):
-        if self.data is not None:
+        """Return a string indicating the HTTP request method."""
+        if self.method is not None:
+            return self.method
+        elif self.data is not None:
             return "POST"
         else:
             return "GET"
 
+    def get_full_url(self):
+        if self.fragment:
+            return '%s#%s' % (self.full_url, self.fragment)
+        else:
+            return self.full_url
+
     # Begin deprecated methods
 
     def add_data(self, data):
+        msg = "Request.add_data method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         self.data = data
 
     def has_data(self):
+        msg = "Request.has_data method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.data is not None
 
     def get_data(self):
+        msg = "Request.get_data method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.data
 
-    def get_full_url(self):
-        if self.fragment:
-            return '%s#%s' % (self.full_url, self.fragment)
-        else:
-            return self.full_url
-
     def get_type(self):
+        msg = "Request.get_type method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.type
 
     def get_host(self):
+        msg = "Request.get_host method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.host
 
     def get_selector(self):
+        msg = "Request.get_selector method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.selector
 
     def is_unverifiable(self):
+        msg = "Request.is_unverifiable method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.unverifiable
 
     def get_origin_req_host(self):
+        msg = "Request.get_origin_req_host method is deprecated."
+        warnings.warn(msg, DeprecationWarning, stacklevel=1)
         return self.origin_req_host
 
     # End deprecated methods
@@ -825,17 +928,23 @@ class AbstractBasicAuthHandler:
             self.retried += 1
 
         if authreq:
-            mo = AbstractBasicAuthHandler.rx.search(authreq)
-            if mo:
-                scheme, quote, realm = mo.groups()
-                if quote not in ["'", '"']:
-                    warnings.warn("Basic Auth Realm was unquoted",
-                                  UserWarning, 2)
-                if scheme.lower() == 'basic':
-                    response = self.retry_http_basic_auth(host, req, realm)
-                    if response and response.code != 401:
-                        self.retried = 0
-                    return response
+            scheme = authreq.split()[0]
+            if scheme.lower() != 'basic':
+                raise ValueError("AbstractBasicAuthHandler does not"
+                                 " support the following scheme: '%s'" %
+                                 scheme)
+            else:
+                mo = AbstractBasicAuthHandler.rx.search(authreq)
+                if mo:
+                    scheme, quote, realm = mo.groups()
+                    if quote not in ['"',"'"]:
+                        warnings.warn("Basic Auth Realm was unquoted",
+                                      UserWarning, 2)
+                    if scheme.lower() == 'basic':
+                        response = self.retry_http_basic_auth(host, req, realm)
+                        if response and response.code != 401:
+                            self.retried = 0
+                        return response
 
     def retry_http_basic_auth(self, host, req, realm):
         user, pw = self.passwd.find_user_password(realm, host)
@@ -878,9 +987,9 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
         return response
 
 
-def randombytes(n):
-    """Return n random bytes."""
-    return os.urandom(n)
+# Return n random bytes.
+_randombytes = os.urandom
+
 
 class AbstractDigestAuthHandler:
     # Digest authentication is specified in RFC 2617.
@@ -921,6 +1030,9 @@ class AbstractDigestAuthHandler:
             scheme = authreq.split()[0]
             if scheme.lower() == 'digest':
                 return self.retry_http_digest_auth(req, authreq)
+            elif scheme.lower() != 'basic':
+                raise ValueError("AbstractDigestAuthHandler does not support"
+                                 " the following scheme: '%s'" % scheme)
 
     def retry_http_digest_auth(self, req, auth):
         token, challenge = auth.split(' ', 1)
@@ -941,7 +1053,7 @@ class AbstractDigestAuthHandler:
         # authentication, and to provide some message integrity protection.
         # This isn't a fabulous effort, but it's probably Good Enough.
         s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
-        b = s.encode("ascii") + randombytes(8)
+        b = s.encode("ascii") + _randombytes(8)
         dig = hashlib.sha1(b).hexdigest()
         return dig[:16]
 
@@ -1162,7 +1274,6 @@ class HTTPHandler(AbstractHTTPHandler):
     http_request = AbstractHTTPHandler.do_request_
 
 if hasattr(http.client, 'HTTPSConnection'):
-    import ssl
 
     class HTTPSHandler(AbstractHTTPHandler):
 
@@ -1177,6 +1288,8 @@ if hasattr(http.client, 'HTTPSConnection'):
 
         https_request = AbstractHTTPHandler.do_request_
 
+    __all__.append('HTTPSHandler')
+
 class HTTPCookieProcessor(BaseHandler):
     def __init__(self, cookiejar=None):
         import http.cookiejar
@@ -1463,6 +1576,9 @@ class URLopener:
 
     # Constructor
     def __init__(self, proxies=None, **x509):
+        msg = "%(class)s style of invoking requests is deprecated."\
+              "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
+        warnings.warn(msg, DeprecationWarning, stacklevel=3)
         if proxies is None:
             proxies = getproxies()
         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
@@ -1542,6 +1658,8 @@ class URLopener:
                 return getattr(self, name)(url)
             else:
                 return getattr(self, name)(url, data)
+        except HTTPError:
+            raise
         except socket.error as msg:
             raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
 
@@ -1662,7 +1780,6 @@ class URLopener:
                 if proxy_bypass(realhost):
                     host = realhost
 
-            #print "proxy via http:", host, selector
         if not host: raise IOError('http error', 'no host given')
 
         if proxy_passwd:
@@ -1761,8 +1878,8 @@ class URLopener:
 
     def open_local_file(self, url):
         """Use local file."""
-        import mimetypes, email.utils
-        from io import StringIO
+        import email.utils
+        import mimetypes
         host, file = splithost(url)
         localname = url2pathname(file)
         try:
@@ -1796,7 +1913,6 @@ class URLopener:
         if not isinstance(url, str):
             raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
         import mimetypes
-        from io import StringIO
         host, path = splithost(url)
         if not host: raise URLError('ftp error', 'no host given')
         host, port = splitport(host)
@@ -1879,7 +1995,7 @@ class URLopener:
         msg.append('Content-type: %s' % type)
         if encoding == 'base64':
             # XXX is this encoding/decoding ok?
-            data = base64.decodebytes(data.encode('ascii')).decode('latin1')
+            data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
         else:
             data = unquote(data)
         msg.append('Content-Length: %d' % len(data))
@@ -1972,7 +2088,6 @@ class FancyURLopener(URLopener):
             URLopener.http_error_default(self, url, fp,
                                          errcode, errmsg, headers)
         stuff = headers['www-authenticate']
-        import re
         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
         if not match:
             URLopener.http_error_default(self, url, fp,
@@ -1998,7 +2113,6 @@ class FancyURLopener(URLopener):
             URLopener.http_error_default(self, url, fp,
                                          errcode, errmsg, headers)
         stuff = headers['proxy-authenticate']
-        import re
         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
         if not match:
             URLopener.http_error_default(self, url, fp,
@@ -2290,8 +2404,6 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
       'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
     }
     """
-    import re
-    import socket
     from fnmatch import fnmatch
 
     hostonly, port = splitport(host)
@@ -2394,7 +2506,6 @@ elif os.name == 'nt':
                     for p in proxyServer.split(';'):
                         protocol, address = p.split('=', 1)
                         # See if address has a type:// prefix
-                        import re
                         if not re.match('^([^/:]+)://', address):
                             address = '%s://%s' % (protocol, address)
                         proxies[protocol] = address
@@ -2426,7 +2537,6 @@ elif os.name == 'nt':
     def proxy_bypass_registry(host):
         try:
             import winreg
-            import re
         except ImportError:
             # Std modules, so should be around - but you never know!
             return 0
@@ -2470,7 +2580,6 @@ elif os.name == 'nt':
             test = test.replace("*", r".*")     # change glob sequence
             test = test.replace("?", r".")      # change glob char
             for val in host:
-                # print "%s <--> %s" %( test, val )
                 if re.match(test, val, re.I):
                     return 1
         return 0