summaryrefslogtreecommitdiff
path: root/Lib/http
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/http')
-rw-r--r--Lib/http/client.py241
-rw-r--r--Lib/http/cookiejar.py6
-rw-r--r--Lib/http/cookies.py5
-rw-r--r--Lib/http/server.py66
4 files changed, 223 insertions, 95 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 352c1017ad..a8e59b9561 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -1,4 +1,4 @@
-"""HTTP/1.1 client library
+r"""HTTP/1.1 client library
<intro stuff goes here>
<other stuff, too>
@@ -420,6 +420,7 @@ class HTTPResponse(io.BufferedIOBase):
self.fp.flush()
def readable(self):
+ """Always returns True"""
return True
# End of "raw stream" methods
@@ -467,6 +468,10 @@ class HTTPResponse(io.BufferedIOBase):
return s
def readinto(self, b):
+ """Read up to len(b) bytes into bytearray b and return the number
+ of bytes read.
+ """
+
if self.fp is None:
return 0
@@ -706,6 +711,17 @@ class HTTPResponse(io.BufferedIOBase):
return self.fp.fileno()
def getheader(self, name, default=None):
+ '''Returns the value of the header matching *name*.
+
+ If there are multiple matching headers, the values are
+ combined into a single string separated by commas and spaces.
+
+ If no matching header is found, returns *default* or None if
+ the *default* is not specified.
+
+ If the headers are unknown, raises http.client.ResponseNotReady.
+
+ '''
if self.headers is None:
raise ResponseNotReady()
headers = self.headers.get_all(name) or default
@@ -728,12 +744,45 @@ class HTTPResponse(io.BufferedIOBase):
# For compatibility with old-style urllib responses.
def info(self):
+ '''Returns an instance of the class mimetools.Message containing
+ meta-information associated with the URL.
+
+ When the method is HTTP, these headers are those returned by
+ the server at the head of the retrieved HTML page (including
+ Content-Length and Content-Type).
+
+ When the method is FTP, a Content-Length header will be
+ present if (as is now usual) the server passed back a file
+ length in response to the FTP retrieval request. A
+ Content-Type header will be present if the MIME type can be
+ guessed.
+
+ When the method is local-file, returned headers will include
+ a Date representing the file's last-modified time, a
+ Content-Length giving file size, and a Content-Type
+ containing a guess at the file's type. See also the
+ description of the mimetools module.
+
+ '''
return self.headers
def geturl(self):
+ '''Return the real URL of the page.
+
+ In some cases, the HTTP server redirects a client to another
+ URL. The urlopen() function handles this transparently, but in
+ some cases the caller needs to know which URL the client was
+ redirected to. The geturl() method can be used to get at this
+ redirected URL.
+
+ '''
return self.url
def getcode(self):
+ '''Return the HTTP status code that was sent with the response,
+ or None if the URL is not an HTTP URL.
+
+ '''
return self.status
class HTTPConnection:
@@ -746,6 +795,44 @@ class HTTPConnection:
auto_open = 1
debuglevel = 0
+ @staticmethod
+ def _is_textIO(stream):
+ """Test whether a file-like object is a text or a binary stream.
+ """
+ return isinstance(stream, io.TextIOBase)
+
+ @staticmethod
+ def _get_content_length(body, method):
+ """Get the content-length based on the body.
+
+ If the body is None, we set Content-Length: 0 for methods that expect
+ a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
+ any method if the body is a str or bytes-like object and not a file.
+ """
+ if body is None:
+ # do an explicit check for not None here to distinguish
+ # between unset and set but empty
+ if method.upper() in _METHODS_EXPECTING_BODY:
+ return 0
+ else:
+ return None
+
+ if hasattr(body, 'read'):
+ # file-like object.
+ return None
+
+ try:
+ # does it implement the buffer protocol (bytes, bytearray, array)?
+ mv = memoryview(body)
+ return mv.nbytes
+ except TypeError:
+ pass
+
+ if isinstance(body, str):
+ return len(body)
+
+ return None
+
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
self.timeout = timeout
@@ -884,18 +971,9 @@ class HTTPConnection:
if hasattr(data, "read") :
if self.debuglevel > 0:
print("sendIng a read()able")
- encode = False
- try:
- mode = data.mode
- except AttributeError:
- # io.BytesIO and other file-like objects don't have a `mode`
- # attribute.
- pass
- else:
- if "b" not in mode:
- encode = True
- if self.debuglevel > 0:
- print("encoding file using iso-8859-1")
+ encode = self._is_textIO(data)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
while 1:
datablock = data.read(blocksize)
if not datablock:
@@ -921,7 +999,22 @@ class HTTPConnection:
"""
self._buffer.append(s)
- def _send_output(self, message_body=None):
+ def _read_readable(self, readable):
+ blocksize = 8192
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = self._is_textIO(readable)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while True:
+ datablock = readable.read(blocksize)
+ if not datablock:
+ break
+ if encode:
+ datablock = datablock.encode("iso-8859-1")
+ yield datablock
+
+ def _send_output(self, message_body=None, encode_chunked=False):
"""Send the currently buffered request and clear the buffer.
Appends an extra \\r\\n to the buffer.
@@ -930,10 +1023,50 @@ class HTTPConnection:
self._buffer.extend((b"", b""))
msg = b"\r\n".join(self._buffer)
del self._buffer[:]
-
self.send(msg)
+
if message_body is not None:
- self.send(message_body)
+
+ # create a consistent interface to message_body
+ if hasattr(message_body, 'read'):
+ # Let file-like take precedence over byte-like. This
+ # is needed to allow the current position of mmap'ed
+ # files to be taken into account.
+ chunks = self._read_readable(message_body)
+ else:
+ try:
+ # this is solely to check to see if message_body
+ # implements the buffer API. it /would/ be easier
+ # to capture if PyObject_CheckBuffer was exposed
+ # to Python.
+ memoryview(message_body)
+ except TypeError:
+ try:
+ chunks = iter(message_body)
+ except TypeError:
+ raise TypeError("message_body should be a bytes-like "
+ "object or an iterable, got %r"
+ % type(message_body))
+ else:
+ # the object implements the buffer interface and
+ # can be passed directly into socket methods
+ chunks = (message_body,)
+
+ for chunk in chunks:
+ if not chunk:
+ if self.debuglevel > 0:
+ print('Zero length chunk ignored')
+ continue
+
+ if encode_chunked and self._http_vsn == 11:
+ # chunked encoding
+ chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
+ + b'\r\n'
+ self.send(chunk)
+
+ if encode_chunked and self._http_vsn == 11:
+ # end chunked transfer
+ self.send(b'0\r\n\r\n')
def putrequest(self, method, url, skip_host=False,
skip_accept_encoding=False):
@@ -1087,52 +1220,27 @@ class HTTPConnection:
header = header + b': ' + value
self._output(header)
- def endheaders(self, message_body=None):
+ def endheaders(self, message_body=None, *, encode_chunked=False):
"""Indicate that the last header line has been sent to the server.
This method sends the request to the server. The optional message_body
argument can be used to pass a message body associated with the
- request. The message body will be sent in the same packet as the
- message headers if it is a string, otherwise it is sent as a separate
- packet.
+ request.
"""
if self.__state == _CS_REQ_STARTED:
self.__state = _CS_REQ_SENT
else:
raise CannotSendHeader()
- self._send_output(message_body)
+ self._send_output(message_body, encode_chunked=encode_chunked)
- def request(self, method, url, body=None, headers={}):
+ def request(self, method, url, body=None, headers={}, *,
+ encode_chunked=False):
"""Send a complete request to the server."""
- self._send_request(method, url, body, headers)
-
- def _set_content_length(self, body, method):
- # Set the content-length based on the body. If the body is "empty", we
- # set Content-Length: 0 for methods that expect a body (RFC 7230,
- # Section 3.3.2). If the body is set for other methods, we set the
- # header provided we can figure out what the length is.
- thelen = None
- method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
- if body is None and method_expects_body:
- thelen = '0'
- elif body is not None:
- try:
- thelen = str(len(body))
- except TypeError:
- # If this is a file-like object, try to
- # fstat its file descriptor
- try:
- thelen = str(os.fstat(body.fileno()).st_size)
- except (AttributeError, OSError):
- # Don't send a length if this failed
- if self.debuglevel > 0: print("Cannot stat!!")
-
- if thelen is not None:
- self.putheader('Content-Length', thelen)
+ self._send_request(method, url, body, headers, encode_chunked)
- def _send_request(self, method, url, body, headers):
+ def _send_request(self, method, url, body, headers, encode_chunked):
# Honor explicitly requested Host: and Accept-Encoding: headers.
- header_names = dict.fromkeys([k.lower() for k in headers])
+ header_names = frozenset(k.lower() for k in headers)
skips = {}
if 'host' in header_names:
skips['skip_host'] = 1
@@ -1141,15 +1249,40 @@ class HTTPConnection:
self.putrequest(method, url, **skips)
+ # chunked encoding will happen if HTTP/1.1 is used and either
+ # the caller passes encode_chunked=True or the following
+ # conditions hold:
+ # 1. content-length has not been explicitly set
+ # 2. the body is a file or iterable, but not a str or bytes-like
+ # 3. Transfer-Encoding has NOT been explicitly set by the caller
+
if 'content-length' not in header_names:
- self._set_content_length(body, method)
+ # only chunk body if not explicitly set for backwards
+ # compatibility, assuming the client code is already handling the
+ # chunking
+ if 'transfer-encoding' not in header_names:
+ # if content-length cannot be automatically determined, fall
+ # back to chunked encoding
+ encode_chunked = False
+ content_length = self._get_content_length(body, method)
+ if content_length is None:
+ if body is not None:
+ if self.debuglevel > 0:
+ print('Unable to determine size of %r' % body)
+ encode_chunked = True
+ self.putheader('Transfer-Encoding', 'chunked')
+ else:
+ self.putheader('Content-Length', str(content_length))
+ else:
+ encode_chunked = False
+
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = _encode(body, 'body')
- self.endheaders(body)
+ self.endheaders(body, encode_chunked=encode_chunked)
def getresponse(self):
"""Get the response from the server.
@@ -1232,6 +1365,12 @@ else:
check_hostname=None):
super(HTTPSConnection, self).__init__(host, port, timeout,
source_address)
+ if (key_file is not None or cert_file is not None or
+ check_hostname is not None):
+ import warnings
+ warnings.warn("key_file, cert_file and check_hostname are "
+ "deprecated, use a custom context instead.",
+ DeprecationWarning, 2)
self.key_file = key_file
self.cert_file = cert_file
if context is None:
diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py
index 6d4572af03..adf956d66a 100644
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@@ -200,7 +200,7 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
STRICT_DATE_RE = re.compile(
r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
- "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
+ r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
WEEKDAY_RE = re.compile(
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
LOOSE_HTTP_DATE_RE = re.compile(
@@ -277,7 +277,7 @@ def http2time(text):
return _str2time(day, mon, yr, hr, min, sec, tz)
ISO_DATE_RE = re.compile(
- """^
+ r"""^
(\d{4}) # year
[-\/]?
(\d\d?) # numerical month
@@ -411,7 +411,7 @@ def split_header_words(header_values):
pairs = []
else:
# skip junk
- non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+ non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
assert nr_junk_chars > 0, (
"split_header_words bug: '%s', '%s', %s" %
(orig_text, text, pairs))
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
index a73fe387f8..be3b080aa3 100644
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -456,9 +456,8 @@ class Morsel(dict):
#
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
-_LegalValueChars = _LegalKeyChars + '\[\]'
+_LegalValueChars = _LegalKeyChars + r'\[\]'
_CookiePattern = re.compile(r"""
- (?x) # This is a verbose pattern
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
@@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
- """, re.ASCII) # May be removed if safe.
+ """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 00620d1f85..e12e45bfc3 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -87,6 +87,7 @@ __all__ = [
"SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
]
+import email.utils
import html
import http.client
import io
@@ -126,9 +127,6 @@ DEFAULT_ERROR_MESSAGE = """\
DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
-def _quote_html(html):
- return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
-
class HTTPServer(socketserver.TCPServer):
allow_reuse_address = 1 # Seems to make sense in testing environment
@@ -136,7 +134,7 @@ class HTTPServer(socketserver.TCPServer):
def server_bind(self):
"""Override server_bind to store the server name."""
socketserver.TCPServer.server_bind(self)
- host, port = self.socket.getsockname()[:2]
+ host, port = self.server_address[:2]
self.server_name = socket.getfqdn(host)
self.server_port = port
@@ -282,12 +280,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
words = requestline.split()
if len(words) == 3:
command, path, version = words
- if version[:5] != 'HTTP/':
- self.send_error(
- HTTPStatus.BAD_REQUEST,
- "Bad request version (%r)" % version)
- return False
try:
+ if version[:5] != 'HTTP/':
+ raise ValueError
base_version_number = version.split('/', 1)[1]
version_number = base_version_number.split(".")
# RFC 2145 section 3.1 says there can be only one "." and
@@ -309,7 +304,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
if version_number >= (2, 0):
self.send_error(
HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
- "Invalid HTTP Version (%s)" % base_version_number)
+ "Invalid HTTP version (%s)" % base_version_number)
return False
elif len(words) == 2:
command, path = words
@@ -332,10 +327,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
try:
self.headers = http.client.parse_headers(self.rfile,
_class=self.MessageClass)
- except http.client.LineTooLong:
+ except http.client.LineTooLong as err:
self.send_error(
- HTTPStatus.BAD_REQUEST,
- "Line too long")
+ HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
+ "Line too long",
+ str(err))
return False
except http.client.HTTPException as err:
self.send_error(
@@ -465,8 +461,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
# (see bug #1100201)
content = (self.error_message_format % {
'code': code,
- 'message': _quote_html(message),
- 'explain': _quote_html(explain)
+ 'message': html.escape(message, quote=False),
+ 'explain': html.escape(explain, quote=False)
})
body = content.encode('UTF-8', 'replace')
self.send_header("Content-Type", self.error_content_type)
@@ -491,12 +487,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
def send_response_only(self, code, message=None):
"""Send the response header only."""
- if message is None:
- if code in self.responses:
- message = self.responses[code][0]
- else:
- message = ''
if self.request_version != 'HTTP/0.9':
+ if message is None:
+ if code in self.responses:
+ message = self.responses[code][0]
+ else:
+ message = ''
if not hasattr(self, '_headers_buffer'):
self._headers_buffer = []
self._headers_buffer.append(("%s %d %s\r\n" %
@@ -583,12 +579,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
"""Return the current date and time formatted for a message header."""
if timestamp is None:
timestamp = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
- s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- self.weekdayname[wd],
- day, self.monthname[month], year,
- hh, mm, ss)
- return s
+ return email.utils.formatdate(timestamp, usegmt=True)
def log_date_time_string(self):
"""Return the current time formatted for logging."""
@@ -726,7 +717,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
errors='surrogatepass')
except UnicodeDecodeError:
displaypath = urllib.parse.unquote(path)
- displaypath = html.escape(displaypath)
+ displaypath = html.escape(displaypath, quote=False)
enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
@@ -750,7 +741,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
r.append('<li><a href="%s">%s</a></li>'
% (urllib.parse.quote(linkname,
errors='surrogatepass'),
- html.escape(displayname)))
+ html.escape(displayname, quote=False)))
r.append('</ul>\n<hr>\n</body>\n</html>\n')
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
@@ -1191,16 +1182,15 @@ def test(HandlerClass=BaseHTTPRequestHandler,
server_address = (bind, port)
HandlerClass.protocol_version = protocol
- httpd = ServerClass(server_address, HandlerClass)
-
- sa = httpd.socket.getsockname()
- print("Serving HTTP on", sa[0], "port", sa[1], "...")
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\nKeyboard interrupt received, exiting.")
- httpd.server_close()
- sys.exit(0)
+ with ServerClass(server_address, HandlerClass) as httpd:
+ sa = httpd.socket.getsockname()
+ serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
+ print(serve_message.format(host=sa[0], port=sa[1]))
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ print("\nKeyboard interrupt received, exiting.")
+ sys.exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()