summaryrefslogtreecommitdiff
path: root/Lib/http/client.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/http/client.py')
-rw-r--r--Lib/http/client.py358
1 files changed, 157 insertions, 201 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 1c69dcb5c7..80c80cf576 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -20,10 +20,12 @@ request. This diagram details these state transitions:
| ( putheader() )* endheaders()
v
Request-sent
- |
- | response = getresponse()
- v
- Unread-response [Response-headers-read]
+ |\_____________________________
+ | | getresponse() raises
+ | response = getresponse() | ConnectionError
+ v v
+ Unread-response Idle
+ [Response-headers-read]
|\____________________
| |
| response.read() | putrequest()
@@ -68,6 +70,7 @@ Req-sent-unread-response _CS_REQ_SENT <response_class>
import email.parser
import email.message
+import http
import io
import os
import re
@@ -82,7 +85,8 @@ __all__ = ["HTTPResponse", "HTTPConnection",
"UnknownTransferEncoding", "UnimplementedFileMode",
"IncompleteRead", "InvalidURL", "ImproperConnectionState",
"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
- "BadStatusLine", "LineTooLong", "error", "responses"]
+ "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
+ "responses"]
HTTP_PORT = 80
HTTPS_PORT = 443
@@ -94,122 +98,13 @@ _CS_IDLE = 'Idle'
_CS_REQ_STARTED = 'Request-started'
_CS_REQ_SENT = 'Request-sent'
-# status codes
-# informational
-CONTINUE = 100
-SWITCHING_PROTOCOLS = 101
-PROCESSING = 102
-
-# successful
-OK = 200
-CREATED = 201
-ACCEPTED = 202
-NON_AUTHORITATIVE_INFORMATION = 203
-NO_CONTENT = 204
-RESET_CONTENT = 205
-PARTIAL_CONTENT = 206
-MULTI_STATUS = 207
-IM_USED = 226
-
-# redirection
-MULTIPLE_CHOICES = 300
-MOVED_PERMANENTLY = 301
-FOUND = 302
-SEE_OTHER = 303
-NOT_MODIFIED = 304
-USE_PROXY = 305
-TEMPORARY_REDIRECT = 307
-
-# client error
-BAD_REQUEST = 400
-UNAUTHORIZED = 401
-PAYMENT_REQUIRED = 402
-FORBIDDEN = 403
-NOT_FOUND = 404
-METHOD_NOT_ALLOWED = 405
-NOT_ACCEPTABLE = 406
-PROXY_AUTHENTICATION_REQUIRED = 407
-REQUEST_TIMEOUT = 408
-CONFLICT = 409
-GONE = 410
-LENGTH_REQUIRED = 411
-PRECONDITION_FAILED = 412
-REQUEST_ENTITY_TOO_LARGE = 413
-REQUEST_URI_TOO_LONG = 414
-UNSUPPORTED_MEDIA_TYPE = 415
-REQUESTED_RANGE_NOT_SATISFIABLE = 416
-EXPECTATION_FAILED = 417
-UNPROCESSABLE_ENTITY = 422
-LOCKED = 423
-FAILED_DEPENDENCY = 424
-UPGRADE_REQUIRED = 426
-PRECONDITION_REQUIRED = 428
-TOO_MANY_REQUESTS = 429
-REQUEST_HEADER_FIELDS_TOO_LARGE = 431
-
-# server error
-INTERNAL_SERVER_ERROR = 500
-NOT_IMPLEMENTED = 501
-BAD_GATEWAY = 502
-SERVICE_UNAVAILABLE = 503
-GATEWAY_TIMEOUT = 504
-HTTP_VERSION_NOT_SUPPORTED = 505
-INSUFFICIENT_STORAGE = 507
-NOT_EXTENDED = 510
-NETWORK_AUTHENTICATION_REQUIRED = 511
+# hack to maintain backwards compatibility
+globals().update(http.HTTPStatus.__members__)
+
+# another hack to maintain backwards compatibility
# Mapping status codes to official W3C names
-responses = {
- 100: 'Continue',
- 101: 'Switching Protocols',
-
- 200: 'OK',
- 201: 'Created',
- 202: 'Accepted',
- 203: 'Non-Authoritative Information',
- 204: 'No Content',
- 205: 'Reset Content',
- 206: 'Partial Content',
-
- 300: 'Multiple Choices',
- 301: 'Moved Permanently',
- 302: 'Found',
- 303: 'See Other',
- 304: 'Not Modified',
- 305: 'Use Proxy',
- 306: '(Unused)',
- 307: 'Temporary Redirect',
-
- 400: 'Bad Request',
- 401: 'Unauthorized',
- 402: 'Payment Required',
- 403: 'Forbidden',
- 404: 'Not Found',
- 405: 'Method Not Allowed',
- 406: 'Not Acceptable',
- 407: 'Proxy Authentication Required',
- 408: 'Request Timeout',
- 409: 'Conflict',
- 410: 'Gone',
- 411: 'Length Required',
- 412: 'Precondition Failed',
- 413: 'Request Entity Too Large',
- 414: 'Request-URI Too Long',
- 415: 'Unsupported Media Type',
- 416: 'Requested Range Not Satisfiable',
- 417: 'Expectation Failed',
- 428: 'Precondition Required',
- 429: 'Too Many Requests',
- 431: 'Request Header Fields Too Large',
-
- 500: 'Internal Server Error',
- 501: 'Not Implemented',
- 502: 'Bad Gateway',
- 503: 'Service Unavailable',
- 504: 'Gateway Timeout',
- 505: 'HTTP Version Not Supported',
- 511: 'Network Authentication Required',
-}
+responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
# maximal amount of data to read at one time in _safe_read
MAXAMOUNT = 1048576
@@ -305,7 +200,7 @@ def parse_headers(fp, _class=HTTPMessage):
return email.parser.Parser(_class=_class).parsestr(hstring)
-class HTTPResponse(io.RawIOBase):
+class HTTPResponse(io.BufferedIOBase):
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
@@ -353,7 +248,8 @@ class HTTPResponse(io.RawIOBase):
if not line:
# Presumably, the server closed the connection before
# sending a valid response.
- raise BadStatusLine(line)
+ raise RemoteDisconnected("Remote end closed connection without"
+ " response")
try:
version, status, reason = line.split(None, 2)
except ValueError:
@@ -532,9 +428,10 @@ class HTTPResponse(io.RawIOBase):
return b""
if amt is not None:
- # Amount is given, so call base class version
- # (which is implemented in terms of self.readinto)
- return super(HTTPResponse, self).read(amt)
+ # Amount is given, implement using readinto
+ b = bytearray(amt)
+ n = self.readinto(b)
+ return memoryview(b)[:n].tobytes()
else:
# Amount is not given (unbounded read) so we must check self.length
# and self.chunked
@@ -614,71 +511,67 @@ class HTTPResponse(io.RawIOBase):
if line in (b'\r\n', b'\n', b''):
break
+ def _get_chunk_left(self):
+ # return self.chunk_left, reading a new chunk if necessary.
+ # chunk_left == 0: at the end of the current chunk, need to close it
+ # chunk_left == None: No current chunk, should read next.
+ # This function returns non-zero or None if the last chunk has
+ # been read.
+ chunk_left = self.chunk_left
+ if not chunk_left: # Can be 0 or None
+ if chunk_left is not None:
+ # We are at the end of chunk. dicard chunk end
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ try:
+ chunk_left = self._read_next_chunk_size()
+ except ValueError:
+ raise IncompleteRead(b'')
+ if chunk_left == 0:
+ # last chunk: 1*("0") [ chunk-extension ] CRLF
+ self._read_and_discard_trailer()
+ # we read everything; close the "file"
+ self._close_conn()
+ chunk_left = None
+ self.chunk_left = chunk_left
+ return chunk_left
+
def _readall_chunked(self):
assert self.chunked != _UNKNOWN
- chunk_left = self.chunk_left
value = []
- while True:
- if chunk_left is None:
- try:
- chunk_left = self._read_next_chunk_size()
- if chunk_left == 0:
- break
- except ValueError:
- raise IncompleteRead(b''.join(value))
- value.append(self._safe_read(chunk_left))
-
- # we read the whole chunk, get another
- self._safe_read(2) # toss the CRLF at the end of the chunk
- chunk_left = None
-
- self._read_and_discard_trailer()
-
- # we read everything; close the "file"
- self._close_conn()
-
- return b''.join(value)
+ try:
+ while True:
+ chunk_left = self._get_chunk_left()
+ if chunk_left is None:
+ break
+ value.append(self._safe_read(chunk_left))
+ self.chunk_left = 0
+ return b''.join(value)
+ except IncompleteRead:
+ raise IncompleteRead(b''.join(value))
def _readinto_chunked(self, b):
assert self.chunked != _UNKNOWN
- chunk_left = self.chunk_left
-
total_bytes = 0
mvb = memoryview(b)
- while True:
- if chunk_left is None:
- try:
- chunk_left = self._read_next_chunk_size()
- if chunk_left == 0:
- break
- except ValueError:
- raise IncompleteRead(bytes(b[0:total_bytes]))
-
- if len(mvb) < chunk_left:
- n = self._safe_readinto(mvb)
- self.chunk_left = chunk_left - n
- return total_bytes + n
- elif len(mvb) == chunk_left:
- n = self._safe_readinto(mvb)
- self._safe_read(2) # toss the CRLF at the end of the chunk
- self.chunk_left = None
- return total_bytes + n
- else:
- temp_mvb = mvb[0:chunk_left]
+ try:
+ while True:
+ chunk_left = self._get_chunk_left()
+ if chunk_left is None:
+ return total_bytes
+
+ if len(mvb) <= chunk_left:
+ n = self._safe_readinto(mvb)
+ self.chunk_left = chunk_left - n
+ return total_bytes + n
+
+ temp_mvb = mvb[:chunk_left]
n = self._safe_readinto(temp_mvb)
mvb = mvb[n:]
total_bytes += n
+ self.chunk_left = 0
- # we read the whole chunk, get another
- self._safe_read(2) # toss the CRLF at the end of the chunk
- chunk_left = None
-
- self._read_and_discard_trailer()
-
- # we read everything; close the "file"
- self._close_conn()
-
- return total_bytes
+ except IncompleteRead:
+ raise IncompleteRead(bytes(b[0:total_bytes]))
def _safe_read(self, amt):
"""Read the number of bytes requested, compensating for partial reads.
@@ -719,6 +612,73 @@ class HTTPResponse(io.RawIOBase):
total_bytes += n
return total_bytes
+ def read1(self, n=-1):
+ """Read with at most one underlying system call. If at least one
+ byte is buffered, return that instead.
+ """
+ if self.fp is None or self._method == "HEAD":
+ return b""
+ if self.chunked:
+ return self._read1_chunked(n)
+ try:
+ result = self.fp.read1(n)
+ except ValueError:
+ if n >= 0:
+ raise
+ # some implementations, like BufferedReader, don't support -1
+ # Read an arbitrarily selected largeish chunk.
+ result = self.fp.read1(16*1024)
+ if not result and n:
+ self._close_conn()
+ return result
+
+ def peek(self, n=-1):
+ # Having this enables IOBase.readline() to read more than one
+ # byte at a time
+ if self.fp is None or self._method == "HEAD":
+ return b""
+ if self.chunked:
+ return self._peek_chunked(n)
+ return self.fp.peek(n)
+
+ def readline(self, limit=-1):
+ if self.fp is None or self._method == "HEAD":
+ return b""
+ if self.chunked:
+ # Fallback to IOBase readline which uses peek() and read()
+ return super().readline(limit)
+ result = self.fp.readline(limit)
+ if not result and limit:
+ self._close_conn()
+ return result
+
+ def _read1_chunked(self, n):
+ # Strictly speaking, _get_chunk_left() may cause more than one read,
+ # but that is ok, since that is to satisfy the chunked protocol.
+ chunk_left = self._get_chunk_left()
+ if chunk_left is None or n == 0:
+ return b''
+ if not (0 <= n <= chunk_left):
+ n = chunk_left # if n is negative or larger than chunk_left
+ read = self.fp.read1(n)
+ self.chunk_left -= len(read)
+ if not read:
+ raise IncompleteRead(b"")
+ return read
+
+ def _peek_chunked(self, n):
+ # Strictly speaking, _get_chunk_left() may cause more than one read,
+ # but that is ok, since that is to satisfy the chunked protocol.
+ try:
+ chunk_left = self._get_chunk_left()
+ except IncompleteRead:
+ return b'' # peek doesn't worry about protocol
+ if chunk_left is None:
+ return b'' # eof
+ # peek is allowed to return more than requested. Just request the
+ # entire chunk, and truncate what we get.
+ return self.fp.peek(chunk_left)[:chunk_left]
+
def fileno(self):
return self.fp.fileno()
@@ -762,14 +722,6 @@ class HTTPConnection:
default_port = HTTP_PORT
auto_open = 1
debuglevel = 0
- # TCP Maximum Segment Size (MSS) is determined by the TCP stack on
- # a per-connection basis. There is no simple and efficient
- # platform independent mechanism for determining the MSS, so
- # instead a reasonable estimate is chosen. The getsockopt()
- # interface using the TCP_MAXSEG parameter may be a suitable
- # approach on some operating systems. A value of 16KiB is chosen
- # as a reasonable estimate of the maximum MSS.
- mss = 16384
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
@@ -851,7 +803,7 @@ class HTTPConnection:
response = self.response_class(self.sock, method=self._method)
(version, code, message) = response._read_status()
- if code != 200:
+ if code != http.HTTPStatus.OK:
self.close()
raise OSError("Tunnel connection failed: %d %s" % (code,
message.strip()))
@@ -865,10 +817,14 @@ class HTTPConnection:
if line in (b'\r\n', b'\n', b''):
break
+ if self.debuglevel > 0:
+ print('header:', line.decode())
+
def connect(self):
"""Connect to the host and port specified in __init__."""
- self.sock = self._create_connection((self.host,self.port),
- self.timeout, self.source_address)
+ self.sock = self._create_connection(
+ (self.host,self.port), self.timeout, self.source_address)
+ self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
if self._tunnel_host:
self._tunnel()
@@ -951,19 +907,9 @@ class HTTPConnection:
self._buffer.extend((b"", b""))
msg = b"\r\n".join(self._buffer)
del self._buffer[:]
- # If msg and message_body are sent in a single send() call,
- # it will avoid performance problems caused by the interaction
- # between delayed ack and the Nagle algorithm. However,
- # there is no performance gain if the message is larger
- # than MSS (and there is a memory penalty for the message
- # copy).
- if isinstance(message_body, bytes) and len(message_body) < self.mss:
- msg += message_body
- message_body = None
+
self.send(msg)
if message_body is not None:
- # message_body was not a string (i.e. it is a file), and
- # we must run the risk of Nagle.
self.send(message_body)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -1224,7 +1170,11 @@ class HTTPConnection:
response = self.response_class(self.sock, method=self._method)
try:
- response.begin()
+ try:
+ response.begin()
+ except ConnectionError:
+ self.close()
+ raise
assert response.will_close != _UNKNOWN
self.__state = _CS_IDLE
@@ -1327,7 +1277,8 @@ class IncompleteRead(HTTPException):
e = ', %i more expected' % self.expected
else:
e = ''
- return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
+ return '%s(%i bytes read%s)' % (self.__class__.__name__,
+ len(self.partial), e)
def __str__(self):
return repr(self)
@@ -1355,5 +1306,10 @@ class LineTooLong(HTTPException):
HTTPException.__init__(self, "got more than %d bytes when reading %s"
% (_MAXLINE, line_type))
+class RemoteDisconnected(ConnectionResetError, BadStatusLine):
+ def __init__(self, *pos, **kw):
+ BadStatusLine.__init__(self, "")
+ ConnectionResetError.__init__(self, *pos, **kw)
+
# for backwards compatibility
error = HTTPException