summaryrefslogtreecommitdiff
path: root/Lib/urllib.py
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2007-08-04 19:22:00 +0000
committerJeremy Hylton <jeremy@alum.mit.edu>2007-08-04 19:22:00 +0000
commitc2ed618e611ce0dad0d0f98d4c42a82d2bec01ea (patch)
treebc614afeb900ffe2193d750a105ff93e908dd8f4 /Lib/urllib.py
parentf4043eb50cb796677d929ba93b2644204f93d32d (diff)
downloadcpython-c2ed618e611ce0dad0d0f98d4c42a82d2bec01ea.tar.gz
Change urllib to use HTTPConnection rather than old HTTP class.
The HTTP class is a backwards compatibility layer for the Python 1.5 API. (The only remaining use in the std library is xmlrpclib.) The current change makes urllib issue HTTP/1.0 requests with HTTPConnection, because is accesses HTTPResponse.fp directly instead of using the read() method. Using fp directly interacts poorly with persistent connections. There are probably better solutions than the current one, but this is a start.
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r--Lib/urllib.py59
1 files changed, 35 insertions, 24 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 2037a9da32..d486bcc5dd 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -81,11 +81,13 @@ def urlopen(url, data=None, proxies=None):
return opener.open(url)
else:
return opener.open(url, data)
+
def urlretrieve(url, filename=None, reporthook=None, data=None):
global _urlopener
if not _urlopener:
_urlopener = FancyURLopener()
return _urlopener.retrieve(url, filename, reporthook, data)
+
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
@@ -310,37 +312,44 @@ class URLopener:
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
- h = httplib.HTTP(host)
+ http_conn = httplib.HTTPConnection(host)
+ # XXX We should fix urllib so that it works with HTTP/1.1.
+ http_conn._http_vsn = 10
+ http_conn._http_vsn_str = "HTTP/1.0"
+
+ headers = {}
+ if proxy_auth:
+ headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
+ if auth:
+ headers["Authorization"] = "Basic %s" % auth
+ if realhost:
+ headers["Host"] = realhost
+ for header, value in self.addheaders:
+ headers[header] = value
+
if data is not None:
- h.putrequest('POST', selector)
- h.putheader('Content-Type', 'application/x-www-form-urlencoded')
- h.putheader('Content-Length', '%d' % len(data))
+ headers["Content-Type"] = "application/x-www-form-urlencoded"
+ http_conn.request("POST", selector, data, headers)
else:
- h.putrequest('GET', selector)
- if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
- if auth: h.putheader('Authorization', 'Basic %s' % auth)
- if realhost: h.putheader('Host', realhost)
- for args in self.addheaders: h.putheader(*args)
- h.endheaders()
- if data is not None:
- h.send(data)
- errcode, errmsg, headers = h.getreply()
- fp = h.getfile()
- if errcode == -1:
- if fp: fp.close()
+ http_conn.request("GET", selector, headers=headers)
+
+ try:
+ response = http_conn.getresponse()
+ except httplib.BadStatusLine:
# something went wrong with the HTTP status line
- raise IOError, ('http protocol error', 0,
- 'got a bad status line', None)
- if errcode == 200:
- return addinfourl(fp, headers, "http:" + url)
+ raise IOError('http protocol error', 0,
+ 'got a bad status line', None)
+
+ if response.status == 200:
+ return addinfourl(response.fp, response.msg, "http:" + url)
else:
- if data is None:
- return self.http_error(url, fp, errcode, errmsg, headers)
- else:
- return self.http_error(url, fp, errcode, errmsg, headers, data)
+ return self.http_error(
+ url, response.fp,
+ response.status, response.reason, response.msg, data)
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
"""Handle http errors.
+
Derived class can override this, or provide specific handlers
named http_error_DDD where DDD is the 3-digit error code."""
# First check if there's a specific handler for this error
@@ -872,6 +881,8 @@ class ftpwrapper:
class addbase:
"""Base class for addinfo and addclosehook."""
+ # XXX Add a method to expose the timeout on the underlying socket?
+
def __init__(self, fp):
self.fp = fp
self.read = self.fp.read