1 files changed, 157 insertions, 201 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 1c69dcb5c7..80c80cf576 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -20,10 +20,12 @@ request. This diagram details these state transitions:
       | ( putheader() )*  endheaders()
       v
     Request-sent
-      |
-      | response = getresponse()
-      v
-    Unread-response   [Response-headers-read]
+      |\_____________________________
+      |                              | getresponse() raises
+      | response = getresponse()     | ConnectionError
+      v                              v
+    Unread-response                Idle
+    [Response-headers-read]
       |\____________________
       |                     |
       | response.read()     | putrequest()
@@ -68,6 +70,7 @@ Req-sent-unread-response       _CS_REQ_SENT       <response_class>
 
 import email.parser
 import email.message
+import http
 import io
 import os
 import re
@@ -82,7 +85,8 @@ __all__ = ["HTTPResponse", "HTTPConnection",
            "UnknownTransferEncoding", "UnimplementedFileMode",
            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
-           "BadStatusLine", "LineTooLong", "error", "responses"]
+           "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
+           "responses"]
 
 HTTP_PORT = 80
 HTTPS_PORT = 443
@@ -94,122 +98,13 @@ _CS_IDLE = 'Idle'
 _CS_REQ_STARTED = 'Request-started'
 _CS_REQ_SENT = 'Request-sent'
 
-# status codes
-# informational
-CONTINUE = 100
-SWITCHING_PROTOCOLS = 101
-PROCESSING = 102
-
-# successful
-OK = 200
-CREATED = 201
-ACCEPTED = 202
-NON_AUTHORITATIVE_INFORMATION = 203
-NO_CONTENT = 204
-RESET_CONTENT = 205
-PARTIAL_CONTENT = 206
-MULTI_STATUS = 207
-IM_USED = 226
-
-# redirection
-MULTIPLE_CHOICES = 300
-MOVED_PERMANENTLY = 301
-FOUND = 302
-SEE_OTHER = 303
-NOT_MODIFIED = 304
-USE_PROXY = 305
-TEMPORARY_REDIRECT = 307
-
-# client error
-BAD_REQUEST = 400
-UNAUTHORIZED = 401
-PAYMENT_REQUIRED = 402
-FORBIDDEN = 403
-NOT_FOUND = 404
-METHOD_NOT_ALLOWED = 405
-NOT_ACCEPTABLE = 406
-PROXY_AUTHENTICATION_REQUIRED = 407
-REQUEST_TIMEOUT = 408
-CONFLICT = 409
-GONE = 410
-LENGTH_REQUIRED = 411
-PRECONDITION_FAILED = 412
-REQUEST_ENTITY_TOO_LARGE = 413
-REQUEST_URI_TOO_LONG = 414
-UNSUPPORTED_MEDIA_TYPE = 415
-REQUESTED_RANGE_NOT_SATISFIABLE = 416
-EXPECTATION_FAILED = 417
-UNPROCESSABLE_ENTITY = 422
-LOCKED = 423
-FAILED_DEPENDENCY = 424
-UPGRADE_REQUIRED = 426
-PRECONDITION_REQUIRED = 428
-TOO_MANY_REQUESTS = 429
-REQUEST_HEADER_FIELDS_TOO_LARGE = 431
-
-# server error
-INTERNAL_SERVER_ERROR = 500
-NOT_IMPLEMENTED = 501
-BAD_GATEWAY = 502
-SERVICE_UNAVAILABLE = 503
-GATEWAY_TIMEOUT = 504
-HTTP_VERSION_NOT_SUPPORTED = 505
-INSUFFICIENT_STORAGE = 507
-NOT_EXTENDED = 510
-NETWORK_AUTHENTICATION_REQUIRED = 511
 
+# hack to maintain backwards compatibility
+globals().update(http.HTTPStatus.__members__)
+
+# another hack to maintain backwards compatibility
 # Mapping status codes to official W3C names
-responses = {
-    100: 'Continue',
-    101: 'Switching Protocols',
-
-    200: 'OK',
-    201: 'Created',
-    202: 'Accepted',
-    203: 'Non-Authoritative Information',
-    204: 'No Content',
-    205: 'Reset Content',
-    206: 'Partial Content',
-
-    300: 'Multiple Choices',
-    301: 'Moved Permanently',
-    302: 'Found',
-    303: 'See Other',
-    304: 'Not Modified',
-    305: 'Use Proxy',
-    306: '(Unused)',
-    307: 'Temporary Redirect',
-
-    400: 'Bad Request',
-    401: 'Unauthorized',
-    402: 'Payment Required',
-    403: 'Forbidden',
-    404: 'Not Found',
-    405: 'Method Not Allowed',
-    406: 'Not Acceptable',
-    407: 'Proxy Authentication Required',
-    408: 'Request Timeout',
-    409: 'Conflict',
-    410: 'Gone',
-    411: 'Length Required',
-    412: 'Precondition Failed',
-    413: 'Request Entity Too Large',
-    414: 'Request-URI Too Long',
-    415: 'Unsupported Media Type',
-    416: 'Requested Range Not Satisfiable',
-    417: 'Expectation Failed',
-    428: 'Precondition Required',
-    429: 'Too Many Requests',
-    431: 'Request Header Fields Too Large',
-
-    500: 'Internal Server Error',
-    501: 'Not Implemented',
-    502: 'Bad Gateway',
-    503: 'Service Unavailable',
-    504: 'Gateway Timeout',
-    505: 'HTTP Version Not Supported',
-    511: 'Network Authentication Required',
-}
+responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
 
 # maximal amount of data to read at one time in _safe_read
 MAXAMOUNT = 1048576
@@ -305,7 +200,7 @@ def parse_headers(fp, _class=HTTPMessage):
     return email.parser.Parser(_class=_class).parsestr(hstring)
 
 
-class HTTPResponse(io.RawIOBase):
+class HTTPResponse(io.BufferedIOBase):
 
     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 
@@ -353,7 +248,8 @@ class HTTPResponse(io.RawIOBase):
         if not line:
             # Presumably, the server closed the connection before
             # sending a valid response.
-            raise BadStatusLine(line)
+            raise RemoteDisconnected("Remote end closed connection without"
+                                     " response")
         try:
             version, status, reason = line.split(None, 2)
         except ValueError:
@@ -532,9 +428,10 @@ class HTTPResponse(io.RawIOBase):
             return b""
 
         if amt is not None:
-            # Amount is given, so call base class version
-            # (which is implemented in terms of self.readinto)
-            return super(HTTPResponse, self).read(amt)
+            # Amount is given, implement using readinto
+            b = bytearray(amt)
+            n = self.readinto(b)
+            return memoryview(b)[:n].tobytes()
         else:
             # Amount is not given (unbounded read) so we must check self.length
             # and self.chunked
@@ -614,71 +511,67 @@ class HTTPResponse(io.RawIOBase):
             if line in (b'\r\n', b'\n', b''):
                 break
 
+    def _get_chunk_left(self):
+        # return self.chunk_left, reading a new chunk if necessary.
+        # chunk_left == 0: at the end of the current chunk, need to close it
+        # chunk_left == None: No current chunk, should read next.
+        # This function returns non-zero or None if the last chunk has
+        # been read.
+        chunk_left = self.chunk_left
+        if not chunk_left: # Can be 0 or None
+            if chunk_left is not None:
+                # We are at the end of chunk. dicard chunk end
+                self._safe_read(2)  # toss the CRLF at the end of the chunk
+            try:
+                chunk_left = self._read_next_chunk_size()
+            except ValueError:
+                raise IncompleteRead(b'')
+            if chunk_left == 0:
+                # last chunk: 1*("0") [ chunk-extension ] CRLF
+                self._read_and_discard_trailer()
+                # we read everything; close the "file"
+                self._close_conn()
+                chunk_left = None
+            self.chunk_left = chunk_left
+        return chunk_left
+
     def _readall_chunked(self):
         assert self.chunked != _UNKNOWN
-        chunk_left = self.chunk_left
         value = []
-        while True:
-            if chunk_left is None:
-                try:
-                    chunk_left = self._read_next_chunk_size()
-                    if chunk_left == 0:
-                        break
-                except ValueError:
-                    raise IncompleteRead(b''.join(value))
-            value.append(self._safe_read(chunk_left))
-
-            # we read the whole chunk, get another
-            self._safe_read(2)      # toss the CRLF at the end of the chunk
-            chunk_left = None
-
-        self._read_and_discard_trailer()
-
-        # we read everything; close the "file"
-        self._close_conn()
-
-        return b''.join(value)
+        try:
+            while True:
+                chunk_left = self._get_chunk_left()
+                if chunk_left is None:
+                    break
+                value.append(self._safe_read(chunk_left))
+                self.chunk_left = 0
+            return b''.join(value)
+        except IncompleteRead:
+            raise IncompleteRead(b''.join(value))
 
     def _readinto_chunked(self, b):
         assert self.chunked != _UNKNOWN
-        chunk_left = self.chunk_left
-
         total_bytes = 0
         mvb = memoryview(b)
-        while True:
-            if chunk_left is None:
-                try:
-                    chunk_left = self._read_next_chunk_size()
-                    if chunk_left == 0:
-                        break
-                except ValueError:
-                    raise IncompleteRead(bytes(b[0:total_bytes]))
-
-            if len(mvb) < chunk_left:
-                n = self._safe_readinto(mvb)
-                self.chunk_left = chunk_left - n
-                return total_bytes + n
-            elif len(mvb) == chunk_left:
-                n = self._safe_readinto(mvb)
-                self._safe_read(2)  # toss the CRLF at the end of the chunk
-                self.chunk_left = None
-                return total_bytes + n
-            else:
-                temp_mvb = mvb[0:chunk_left]
+        try:
+            while True:
+                chunk_left = self._get_chunk_left()
+                if chunk_left is None:
+                    return total_bytes
+
+                if len(mvb) <= chunk_left:
+                    n = self._safe_readinto(mvb)
+                    self.chunk_left = chunk_left - n
+                    return total_bytes + n
+
+                temp_mvb = mvb[:chunk_left]
                 n = self._safe_readinto(temp_mvb)
                 mvb = mvb[n:]
                 total_bytes += n
+                self.chunk_left = 0
 
-            # we read the whole chunk, get another
-            self._safe_read(2)      # toss the CRLF at the end of the chunk
-            chunk_left = None
-
-        self._read_and_discard_trailer()
-
-        # we read everything; close the "file"
-        self._close_conn()
-
-        return total_bytes
+        except IncompleteRead:
+            raise IncompleteRead(bytes(b[0:total_bytes]))
 
     def _safe_read(self, amt):
         """Read the number of bytes requested, compensating for partial reads.
@@ -719,6 +612,73 @@ class HTTPResponse(io.RawIOBase):
             total_bytes += n
         return total_bytes
 
+    def read1(self, n=-1):
+        """Read with at most one underlying system call.  If at least one
+        byte is buffered, return that instead.
+        """
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            return self._read1_chunked(n)
+        try:
+            result = self.fp.read1(n)
+        except ValueError:
+            if n >= 0:
+                raise
+            # some implementations, like BufferedReader, don't support -1
+            # Read an arbitrarily selected largeish chunk.
+            result = self.fp.read1(16*1024)
+        if not result and n:
+            self._close_conn()
+        return result
+
+    def peek(self, n=-1):
+        # Having this enables IOBase.readline() to read more than one
+        # byte at a time
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            return self._peek_chunked(n)
+        return self.fp.peek(n)
+
+    def readline(self, limit=-1):
+        if self.fp is None or self._method == "HEAD":
+            return b""
+        if self.chunked:
+            # Fallback to IOBase readline which uses peek() and read()
+            return super().readline(limit)
+        result = self.fp.readline(limit)
+        if not result and limit:
+            self._close_conn()
+        return result
+
+    def _read1_chunked(self, n):
+        # Strictly speaking, _get_chunk_left() may cause more than one read,
+        # but that is ok, since that is to satisfy the chunked protocol.
+        chunk_left = self._get_chunk_left()
+        if chunk_left is None or n == 0:
+            return b''
+        if not (0 <= n <= chunk_left):
+            n = chunk_left # if n is negative or larger than chunk_left
+        read = self.fp.read1(n)
+        self.chunk_left -= len(read)
+        if not read:
+            raise IncompleteRead(b"")
+        return read
+
+    def _peek_chunked(self, n):
+        # Strictly speaking, _get_chunk_left() may cause more than one read,
+        # but that is ok, since that is to satisfy the chunked protocol.
+        try:
+            chunk_left = self._get_chunk_left()
+        except IncompleteRead:
+            return b'' # peek doesn't worry about protocol
+        if chunk_left is None:
+            return b'' # eof
+        # peek is allowed to return more than requested.  Just request the
+        # entire chunk, and truncate what we get.
+        return self.fp.peek(chunk_left)[:chunk_left]
+
     def fileno(self):
         return self.fp.fileno()
 
@@ -762,14 +722,6 @@ class HTTPConnection:
     default_port = HTTP_PORT
     auto_open = 1
     debuglevel = 0
-    # TCP Maximum Segment Size (MSS) is determined by the TCP stack on
-    # a per-connection basis.  There is no simple and efficient
-    # platform independent mechanism for determining the MSS, so
-    # instead a reasonable estimate is chosen.  The getsockopt()
-    # interface using the TCP_MAXSEG parameter may be a suitable
-    # approach on some operating systems. A value of 16KiB is chosen
-    # as a reasonable estimate of the maximum MSS.
-    mss = 16384
 
     def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
                  source_address=None):
@@ -851,7 +803,7 @@ class HTTPConnection:
         response = self.response_class(self.sock, method=self._method)
         (version, code, message) = response._read_status()
 
-        if code != 200:
+        if code != http.HTTPStatus.OK:
             self.close()
             raise OSError("Tunnel connection failed: %d %s" % (code,
                                                                message.strip()))
@@ -865,10 +817,14 @@ class HTTPConnection:
             if line in (b'\r\n', b'\n', b''):
                 break
 
+            if self.debuglevel > 0:
+                print('header:', line.decode())
+
     def connect(self):
         """Connect to the host and port specified in __init__."""
-        self.sock = self._create_connection((self.host,self.port),
-                                            self.timeout, self.source_address)
+        self.sock = self._create_connection(
+            (self.host,self.port), self.timeout, self.source_address)
+        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
 
         if self._tunnel_host:
             self._tunnel()
@@ -951,19 +907,9 @@ class HTTPConnection:
         self._buffer.extend((b"", b""))
         msg = b"\r\n".join(self._buffer)
         del self._buffer[:]
-        # If msg and message_body are sent in a single send() call,
-        # it will avoid performance problems caused by the interaction
-        # between delayed ack and the Nagle algorithm. However,
-        # there is no performance gain if the message is larger
-        # than MSS (and there is a memory penalty for the message
-        # copy).
-        if isinstance(message_body, bytes) and len(message_body) < self.mss:
-            msg += message_body
-            message_body = None
+
         self.send(msg)
         if message_body is not None:
-            # message_body was not a string (i.e. it is a file), and
-            # we must run the risk of Nagle.
             self.send(message_body)
 
     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -1224,7 +1170,11 @@ class HTTPConnection:
             response = self.response_class(self.sock, method=self._method)
 
         try:
-            response.begin()
+            try:
+                response.begin()
+            except ConnectionError:
+                self.close()
+                raise
             assert response.will_close != _UNKNOWN
             self.__state = _CS_IDLE
 
@@ -1327,7 +1277,8 @@ class IncompleteRead(HTTPException):
             e = ', %i more expected' % self.expected
         else:
             e = ''
-        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
+        return '%s(%i bytes read%s)' % (self.__class__.__name__,
+                                        len(self.partial), e)
     def __str__(self):
         return repr(self)
 
@@ -1355,5 +1306,10 @@ class LineTooLong(HTTPException):
         HTTPException.__init__(self, "got more than %d bytes when reading %s"
                                      % (_MAXLINE, line_type))
 
+class RemoteDisconnected(ConnectionResetError, BadStatusLine):
+    def __init__(self, *pos, **kw):
+        BadStatusLine.__init__(self, "")
+        ConnectionResetError.__init__(self, *pos, **kw)
+
 # for backwards compatibility
 error = HTTPException