From 118db635966e5db43f83561ba7c15bf9b6095e74 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Wed, 12 Nov 2014 23:33:50 +1000 Subject: Close #19494: add urrlib.request.HTTPBasicPriorAuthHandler This auth handler adds the Authorization header to the first HTTP request rather than waiting for a HTTP 401 Unauthorized response from the server as the default HTTPBasicAuthHandler does. This allows working with websites like https://api.github.com which do not follow the strict interpretation of RFC, but more the dicta in the end of section 2 of RFC 2617: > A client MAY preemptively send the corresponding Authorization > header with requests for resources in that space without receipt > of another challenge from the server. Similarly, when a client > sends a request to a proxy, it may reuse a userid and password in > the Proxy-Authorization header field without receiving another > challenge from the proxy server. See section 4 for security > considerations associated with Basic authentication. Patch by Matej Cepl. --- Lib/urllib/request.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index e0c8116373..36ae1ef461 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -916,6 +916,21 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): return response +class HTTPBasicPriorAuthHandler(HTTPBasicAuthHandler): + handler_order = 400 + + def http_request(self, req): + if not req.has_header('Authorization'): + user, passwd = self.passwd.find_user_password(None, req.host) + credentials = '{0}:{1}'.format(user, passwd).encode() + auth_str = base64.standard_b64encode(credentials).decode() + req.add_unredirected_header('Authorization', + 'Basic {}'.format(auth_str.strip())) + return req + + https_request = http_request + + # Return n random bytes. _randombytes = os.urandom -- cgit v1.2.1 From 4ee310b7b10d007f683f275da48e7be3a6358037 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Thu, 16 Apr 2015 16:36:18 -0400 Subject: #7159: generalize urllib prior auth support. This fix is a superset of the functionality introduced by the issue #19494 enhancement, and supersedes that fix. Instead of a new handler, we have a new password manager that tracks whether we should send the auth for a given uri. This allows us to say "always send", satisfying #19494, or track that we've succeeded in auth and send the creds right away on every *subsequent* request. The support for using the password manager is added to AbstractBasicAuth, which means the proxy handler also now can handle prior auth if passed the new password manager. Patch by Akshit Khurana, docs mostly by me. --- Lib/urllib/request.py | 78 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 18 deletions(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 2e436ecfda..eada0a9132 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -120,9 +120,10 @@ __all__ = [ 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', - 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', - 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', + 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler', + 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', + 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler', + 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 'UnknownHandler', 'HTTPErrorProcessor', # Functions 'urlopen', 'install_opener', 'build_opener', @@ -835,6 +836,37 @@ class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): return HTTPPasswordMgr.find_user_password(self, None, authuri) +class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): + + def __init__(self, *args, **kwargs): + self.authenticated = {} + super().__init__(*args, **kwargs) + + def add_password(self, realm, uri, user, passwd, is_authenticated=False): + self.update_authenticated(uri, is_authenticated) + # Add a default for prior auth requests + if realm is not None: + super().add_password(None, uri, user, passwd) + super().add_password(realm, uri, user, passwd) + + def update_authenticated(self, uri, is_authenticated=False): + # uri could be a single URI or a sequence + if isinstance(uri, str): + uri = [uri] + + for default_port in True, False: + for u in uri: + reduced_uri = self.reduce_uri(u, default_port) + self.authenticated[reduced_uri] = is_authenticated + + def is_authenticated(self, authuri): + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uri in self.authenticated: + if self.is_suburi(uri, reduced_authuri): + return self.authenticated[uri] + + class AbstractBasicAuthHandler: # XXX this allows for multiple auth-schemes, but will stupidly pick @@ -889,6 +921,31 @@ class AbstractBasicAuthHandler: else: return None + def http_request(self, req): + if (not hasattr(self.passwd, 'is_authenticated') or + not self.passwd.is_authenticated(req.full_url)): + return req + + if not req.has_header('Authorization'): + user, passwd = self.passwd.find_user_password(None, req.full_url) + credentials = '{0}:{1}'.format(user, passwd).encode() + auth_str = base64.standard_b64encode(credentials).decode() + req.add_unredirected_header('Authorization', + 'Basic {}'.format(auth_str.strip())) + return req + + def http_response(self, req, response): + if hasattr(self.passwd, 'is_authenticated'): + if 200 <= response.code < 300: + self.passwd.update_authenticated(req.full_url, True) + else: + self.passwd.update_authenticated(req.full_url, False) + return response + + https_request = http_request + https_response = http_response + + class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): @@ -916,21 +973,6 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): return response -class HTTPBasicPriorAuthHandler(HTTPBasicAuthHandler): - handler_order = 400 - - def http_request(self, req): - if not req.has_header('Authorization'): - user, passwd = self.passwd.find_user_password(None, req.host) - credentials = '{0}:{1}'.format(user, passwd).encode() - auth_str = base64.standard_b64encode(credentials).decode() - req.add_unredirected_header('Authorization', - 'Basic {}'.format(auth_str.strip())) - return req - - https_request = http_request - - # Return n random bytes. _randombytes = os.urandom -- cgit v1.2.1 From e8a3563e9c434e35fb4e1880cc11dd4af6031ea4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 18 Aug 2015 00:35:52 -0700 Subject: Add missing docstring --- Lib/urllib/request.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index a7fd017e10..e6abf34fa0 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -138,6 +138,71 @@ __version__ = sys.version[:3] _opener = None def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, *, cafile=None, capath=None, cadefault=False, context=None): + '''Open the URL url, which can be either a string or a Request object. + + *data* must be a bytes object specifying additional data to be sent to the + server, or None if no such data is needed. data may also be an iterable + object and in that case Content-Length value must be specified in the + headers. Currently HTTP requests are the only ones that use data; the HTTP + request will be a POST instead of a GET when the data parameter is + provided. + + *data* should be a buffer in the standard application/x-www-form-urlencoded + format. The urllib.parse.urlencode() function takes a mapping or sequence + of 2-tuples and returns a string in this format. It should be encoded to + bytes before being used as the data parameter. The charset parameter in + Content-Type header may be used to specify the encoding. If charset + parameter is not sent with the Content-Type header, the server following + the HTTP 1.1 recommendation may assume that the data is encoded in + ISO-8859-1 encoding. It is advisable to use charset parameter with encoding + used in Content-Type header with the Request. + + urllib.request module uses HTTP/1.1 and includes a "Connection:close" + header in its HTTP requests. + + The optional *timeout* parameter specifies a timeout in seconds for + blocking operations like the connection attempt (if not specified, the + global default timeout setting will be used). This only works for HTTP, + HTTPS and FTP connections. + + If *context* is specified, it must be a ssl.SSLContext instance describing + the various SSL options. See HTTPSConnection for more details. + + The optional *cafile* and *capath* parameters specify a set of trusted CA + certificates for HTTPS requests. cafile should point to a single file + containing a bundle of CA certificates, whereas capath should point to a + directory of hashed certificate files. More information can be found in + ssl.SSLContext.load_verify_locations(). + + The *cadefault* parameter is ignored. + + For http and https urls, this function returns a http.client.HTTPResponse + object which has the following HTTPResponse Objects methods. + + For ftp, file, and data urls and requests explicitly handled by legacy + URLopener and FancyURLopener classes, this function returns a + urllib.response.addinfourl object which can work as context manager and has + methods such as: + + * geturl() — return the URL of the resource retrieved, commonly used to + determine if a redirect was followed + + * info() — return the meta-information of the page, such as headers, in the + form of an email.message_from_string() instance (see Quick Reference to + HTTP Headers) + + * getcode() – return the HTTP status code of the response. Raises URLError + on errors. + + Note that *None& may be returned if no handler handles the request (though + the default installed global OpenerDirector uses UnknownHandler to ensure + this never happens). + + In addition, if proxy settings are detected (for example, when a *_proxy + environment variable like http_proxy is set), ProxyHandler is default + installed and makes sure the requests are handled through the proxy. + + ''' global _opener if cafile or capath or cadefault: if context is not None: -- cgit v1.2.1 From 5b9914ed5262a882872108edab1ff16ff7648e55 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Tue, 24 Nov 2015 23:00:37 +0000 Subject: Issue #25576: Apply fix to new urlopen() doc string --- Lib/urllib/request.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index e6abf34fa0..57d0dea075 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -149,13 +149,8 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, *data* should be a buffer in the standard application/x-www-form-urlencoded format. The urllib.parse.urlencode() function takes a mapping or sequence - of 2-tuples and returns a string in this format. It should be encoded to - bytes before being used as the data parameter. The charset parameter in - Content-Type header may be used to specify the encoding. If charset - parameter is not sent with the Content-Type header, the server following - the HTTP 1.1 recommendation may assume that the data is encoded in - ISO-8859-1 encoding. It is advisable to use charset parameter with encoding - used in Content-Type header with the Request. + of 2-tuples and returns an ASCII text string in this format. It should be + encoded to bytes before being used as the data parameter. urllib.request module uses HTTP/1.1 and includes a "Connection:close" header in its HTTP requests. -- cgit v1.2.1 From 279d27ab66ede3c7fdd1349e8834413e5b32a85e Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Thu, 4 Feb 2016 06:01:35 +0000 Subject: Issue #12923: Reset FancyURLopener's redirect counter even on exception Based on patches by Brian Brazil and Daniel Rocco. --- Lib/urllib/request.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index a7fd017e10..4c1651861b 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2050,18 +2050,20 @@ class FancyURLopener(URLopener): def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): """Error 302 -- relocated (temporarily).""" self.tries += 1 - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default + try: + if self.maxtries and self.tries >= self.maxtries: + if hasattr(self, "http_error_500"): + meth = self.http_error_500 + else: + meth = self.http_error_default + return meth(url, fp, 500, + "Internal Server Error: Redirect Recursion", + headers) + result = self.redirect_internal(url, fp, errcode, errmsg, + headers, data) + return result + finally: self.tries = 0 - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", headers) - result = self.redirect_internal(url, fp, errcode, errmsg, headers, - data) - self.tries = 0 - return result def redirect_internal(self, url, fp, errcode, errmsg, headers, data): if 'location' in headers: -- cgit v1.2.1 From 2b442f7d5b61dcefc5a7a18a6e7af9de5983790c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 11 Feb 2016 13:10:36 +0200 Subject: Issue #25985: sys.version_info is now used instead of sys.version to format short Python version. --- Lib/urllib/request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 4c2b9fe0e2..e3eed16131 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -133,7 +133,7 @@ __all__ = [ ] # used in User-Agent header sent -__version__ = sys.version[:3] +__version__ = '%d.%d' % sys.version_info[:2] _opener = None def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, -- cgit v1.2.1 From aecc893b71bf403c9b3abd4fc162a37104aedd68 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Sun, 6 Mar 2016 16:16:40 +0200 Subject: Issue #2202: Fix UnboundLocalError in AbstractDigestAuthHandler.get_algorithm_impls Raise ValueError if algorithm is not MD5 or SHA. Initial patch by Mathieu Dupuy. --- Lib/urllib/request.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 4c1651861b..fc8ef7f91b 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1111,6 +1111,9 @@ class AbstractDigestAuthHandler: elif algorithm == 'SHA': H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() # XXX MD5-sess + else: + raise ValueError("Unsupported digest authentication " + "algorithm %r" % algorithm) KD = lambda s, d: H("%s:%s" % (s, d)) return H, KD -- cgit v1.2.1