From 1d17079876f2e0b64a048eee2c1d11f1b4f7815e Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Wed, 24 Aug 2016 06:33:33 +0000 Subject: Issue #12319: Support for chunked encoding of HTTP request bodies When the body object is a file, its size is no longer determined with fstat(), since that can report the wrong result (e.g. reading from a pipe). Instead, determine the size using seek(), or fall back to chunked encoding for unseekable files. Also, change the logic for detecting text files to check for TextIOBase inheritance, rather than inspecting the ?mode? attribute, which may not exist (e.g. BytesIO and StringIO). The Content-Length for text files is no longer determined ahead of time, because the original logic could have been wrong depending on the codec and newline translation settings. Patch by Demian Brecht and Rolf Krahl, with a few tweaks by me. --- Lib/http/client.py | 199 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 149 insertions(+), 50 deletions(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index 763e1ef4f6..b242ba6559 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -795,6 +795,58 @@ class HTTPConnection: auto_open = 1 debuglevel = 0 + @staticmethod + def _is_textIO(stream): + """Test whether a file-like object is a text or a binary stream. + """ + return isinstance(stream, io.TextIOBase) + + @staticmethod + def _get_content_length(body, method): + """Get the content-length based on the body. + + If the body is "empty", we set Content-Length: 0 for methods + that expect a body (RFC 7230, Section 3.3.2). If the body is + set for other methods, we set the header provided we can + figure out what the length is. + """ + if not body: + # do an explicit check for not None here to distinguish + # between unset and set but empty + if method.upper() in _METHODS_EXPECTING_BODY or body is not None: + return 0 + else: + return None + + if hasattr(body, 'read'): + # file-like object. + if HTTPConnection._is_textIO(body): + # text streams are unpredictable because it depends on + # character encoding and line ending translation. + return None + else: + # Is it seekable? + try: + curpos = body.tell() + sz = body.seek(0, io.SEEK_END) + except (TypeError, AttributeError, OSError): + return None + else: + body.seek(curpos) + return sz - curpos + + try: + # does it implement the buffer protocol (bytes, bytearray, array)? + mv = memoryview(body) + return mv.nbytes + except TypeError: + pass + + if isinstance(body, str): + return len(body) + + return None + def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): self.timeout = timeout @@ -933,18 +985,9 @@ class HTTPConnection: if hasattr(data, "read") : if self.debuglevel > 0: print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") + encode = self._is_textIO(data) + if encode and self.debuglevel > 0: + print("encoding file using iso-8859-1") while 1: datablock = data.read(blocksize) if not datablock: @@ -970,7 +1013,22 @@ class HTTPConnection: """ self._buffer.append(s) - def _send_output(self, message_body=None): + def _read_readable(self, readable): + blocksize = 8192 + if self.debuglevel > 0: + print("sendIng a read()able") + encode = self._is_textIO(readable) + if encode and self.debuglevel > 0: + print("encoding file using iso-8859-1") + while True: + datablock = readable.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode("iso-8859-1") + yield datablock + + def _send_output(self, message_body=None, encode_chunked=False): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. @@ -979,10 +1037,50 @@ class HTTPConnection: self._buffer.extend((b"", b"")) msg = b"\r\n".join(self._buffer) del self._buffer[:] - self.send(msg) + if message_body is not None: - self.send(message_body) + + # create a consistent interface to message_body + if hasattr(message_body, 'read'): + # Let file-like take precedence over byte-like. This + # is needed to allow the current position of mmap'ed + # files to be taken into account. + chunks = self._read_readable(message_body) + else: + try: + # this is solely to check to see if message_body + # implements the buffer API. it /would/ be easier + # to capture if PyObject_CheckBuffer was exposed + # to Python. + memoryview(message_body) + except TypeError: + try: + chunks = iter(message_body) + except TypeError: + raise TypeError("message_body should be a bytes-like " + "object or an iterable, got %r" + % type(message_body)) + else: + # the object implements the buffer interface and + # can be passed directly into socket methods + chunks = (message_body,) + + for chunk in chunks: + if not chunk: + if self.debuglevel > 0: + print('Zero length chunk ignored') + continue + + if encode_chunked and self._http_vsn == 11: + # chunked encoding + chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ + + b'\r\n' + self.send(chunk) + + if encode_chunked and self._http_vsn == 11: + # end chunked transfer + self.send(b'0\r\n\r\n') def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): """Send a request to the server. @@ -1135,52 +1233,27 @@ class HTTPConnection: header = header + b': ' + value self._output(header) - def endheaders(self, message_body=None): + def endheaders(self, message_body=None, *, encode_chunked=False): """Indicate that the last header line has been sent to the server. This method sends the request to the server. The optional message_body argument can be used to pass a message body associated with the - request. The message body will be sent in the same packet as the - message headers if it is a string, otherwise it is sent as a separate - packet. + request. """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT else: raise CannotSendHeader() - self._send_output(message_body) + self._send_output(message_body, encode_chunked=encode_chunked) - def request(self, method, url, body=None, headers={}): + def request(self, method, url, body=None, headers={}, *, + encode_chunked=False): """Send a complete request to the server.""" - self._send_request(method, url, body, headers) - - def _set_content_length(self, body, method): - # Set the content-length based on the body. If the body is "empty", we - # set Content-Length: 0 for methods that expect a body (RFC 7230, - # Section 3.3.2). If the body is set for other methods, we set the - # header provided we can figure out what the length is. - thelen = None - method_expects_body = method.upper() in _METHODS_EXPECTING_BODY - if body is None and method_expects_body: - thelen = '0' - elif body is not None: - try: - thelen = str(len(body)) - except TypeError: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") + self._send_request(method, url, body, headers, encode_chunked) - if thelen is not None: - self.putheader('Content-Length', thelen) - - def _send_request(self, method, url, body, headers): + def _send_request(self, method, url, body, headers, encode_chunked): # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) + header_names = frozenset(k.lower() for k in headers) skips = {} if 'host' in header_names: skips['skip_host'] = 1 @@ -1189,15 +1262,41 @@ class HTTPConnection: self.putrequest(method, url, **skips) + # chunked encoding will happen if HTTP/1.1 is used and either + # the caller passes encode_chunked=True or the following + # conditions hold: + # 1. content-length has not been explicitly set + # 2. the length of the body cannot be determined + # (e.g. it is a generator or unseekable file) + # 3. Transfer-Encoding has NOT been explicitly set by the caller + if 'content-length' not in header_names: - self._set_content_length(body, method) + # only chunk body if not explicitly set for backwards + # compatibility, assuming the client code is already handling the + # chunking + if 'transfer-encoding' not in header_names: + # if content-length cannot be automatically determined, fall + # back to chunked encoding + encode_chunked = False + content_length = self._get_content_length(body, method) + if content_length is None: + if body: + if self.debuglevel > 0: + print('Unable to determine size of %r' % body) + encode_chunked = True + self.putheader('Transfer-Encoding', 'chunked') + else: + self.putheader('Content-Length', str(content_length)) + else: + encode_chunked = False + for hdr, value in headers.items(): self.putheader(hdr, value) if isinstance(body, str): # RFC 2616 Section 3.7.1 says that text default has a # default charset of iso-8859-1. body = _encode(body, 'body') - self.endheaders(body) + self.endheaders(body, encode_chunked=encode_chunked) def getresponse(self): """Get the response from the server. -- cgit v1.2.1 From 058e6a49c6e7b2847286eea5284a89b2757148d9 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Sat, 27 Aug 2016 01:39:26 +0000 Subject: Issue #12319: Always send file request bodies using chunked encoding The previous attempt to determine the file?s Content-Length gave a false positive for pipes on Windows. Also, drop the special case for sending zero-length iterable bodies. --- Lib/http/client.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index b242ba6559..9d5cf4518f 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -805,35 +805,21 @@ class HTTPConnection: def _get_content_length(body, method): """Get the content-length based on the body. - If the body is "empty", we set Content-Length: 0 for methods - that expect a body (RFC 7230, Section 3.3.2). If the body is - set for other methods, we set the header provided we can - figure out what the length is. + If the body is None, we set Content-Length: 0 for methods that expect + a body (RFC 7230, Section 3.3.2). We also set the Content-Length for + any method if the body is a str or bytes-like object and not a file. """ - if not body: + if body is None: # do an explicit check for not None here to distinguish # between unset and set but empty - if method.upper() in _METHODS_EXPECTING_BODY or body is not None: + if method.upper() in _METHODS_EXPECTING_BODY: return 0 else: return None if hasattr(body, 'read'): # file-like object. - if HTTPConnection._is_textIO(body): - # text streams are unpredictable because it depends on - # character encoding and line ending translation. - return None - else: - # Is it seekable? - try: - curpos = body.tell() - sz = body.seek(0, io.SEEK_END) - except (TypeError, AttributeError, OSError): - return None - else: - body.seek(curpos) - return sz - curpos + return None try: # does it implement the buffer protocol (bytes, bytearray, array)? @@ -1266,8 +1252,7 @@ class HTTPConnection: # the caller passes encode_chunked=True or the following # conditions hold: # 1. content-length has not been explicitly set - # 2. the length of the body cannot be determined - # (e.g. it is a generator or unseekable file) + # 2. the body is a file or iterable, but not a str or bytes-like # 3. Transfer-Encoding has NOT been explicitly set by the caller if 'content-length' not in header_names: @@ -1280,7 +1265,7 @@ class HTTPConnection: encode_chunked = False content_length = self._get_content_length(body, method) if content_length is None: - if body: + if body is not None: if self.debuglevel > 0: print('Unable to determine size of %r' % body) encode_chunked = True -- cgit v1.2.1 From 4846ba81c69ebc815b35a89ed54fef7b5ffb1417 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 30 Aug 2016 10:47:49 -0700 Subject: Issue #27895: Spelling fixes (Contributed by Ville Skytt?). --- Lib/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index 9d5cf4518f..9107412922 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -136,7 +136,7 @@ _MAXHEADERS = 100 # # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 -# the patterns for both name and value are more leniant than RFC +# the patterns for both name and value are more lenient than RFC # definitions to allow for backwards compatibility _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search -- cgit v1.2.1 From 1d2b9e79f95d43e224e4a3e898b5e3c9bab30582 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sat, 3 Sep 2016 10:43:20 -0400 Subject: Issue 27921: Remove backslash from another f-string. --- Lib/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index 9107412922..230bccec98 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1060,7 +1060,7 @@ class HTTPConnection: if encode_chunked and self._http_vsn == 11: # chunked encoding - chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ + chunk = f'{len(chunk):X}''\r\n'.encode('ascii') + chunk \ + b'\r\n' self.send(chunk) -- cgit v1.2.1 From 03c8a473999b08710ae3661724cf1bf1d76bc555 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Thu, 8 Sep 2016 13:59:53 -0400 Subject: #27364: fix "incorrect" uses of escape character in the stdlib. And most of the tools. Patch by Emanual Barry, reviewed by me, Serhiy Storchaka, and Martin Panter. --- Lib/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index 230bccec98..a1c4ab9482 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1,4 +1,4 @@ -"""HTTP/1.1 client library +r"""HTTP/1.1 client library -- cgit v1.2.1 From 66bb1f875a7615c255128fb1e2fd6e9e50e4cefa Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Fri, 9 Sep 2016 21:56:20 -0400 Subject: Issue 27948: Allow backslashes in the literal string portion of f-strings, but not in the expressions. Also, require expressions to begin and end with literal curly braces. --- Lib/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index ad8f4104f4..6ee1913545 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1060,7 +1060,7 @@ class HTTPConnection: if encode_chunked and self._http_vsn == 11: # chunked encoding - chunk = f'{len(chunk):X}''\r\n'.encode('ascii') + chunk \ + chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ + b'\r\n' self.send(chunk) -- cgit v1.2.1 From de56c23b9a770f4872dd5a89478b0b334cc7de86 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 10 Sep 2016 23:23:33 +0200 Subject: Issue #28022: Deprecate ssl-related arguments in favor of SSLContext. The deprecation include manual creation of SSLSocket and certfile/keyfile (or similar) in ftplib, httplib, imaplib, smtplib, poplib and urllib. ssl.wrap_socket() is not marked as deprecated yet. --- Lib/http/client.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Lib/http/client.py') diff --git a/Lib/http/client.py b/Lib/http/client.py index 6ee1913545..a8e59b9561 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1365,6 +1365,12 @@ else: check_hostname=None): super(HTTPSConnection, self).__init__(host, port, timeout, source_address) + if (key_file is not None or cert_file is not None or + check_hostname is not None): + import warnings + warnings.warn("key_file, cert_file and check_hostname are " + "deprecated, use a custom context instead.", + DeprecationWarning, 2) self.key_file = key_file self.cert_file = cert_file if context is None: -- cgit v1.2.1