diff options
author | Mariatta Wijaya <mariatta.wijaya@gmail.com> | 2017-02-06 20:16:58 -0800 |
---|---|---|
committer | Mariatta Wijaya <mariatta.wijaya@gmail.com> | 2017-02-06 20:16:58 -0800 |
commit | da79bcf8ac7ae72218ab023e1ed54390bc1a3a27 (patch) | |
tree | 74845e2dbd9521d9748b9c32f1922f4123083bf3 /Lib/http | |
parent | e3c7e835bdfc97750eb9b7fc0ad2493108c2d438 (diff) | |
parent | 1fe806ac56f8b83694d24ab604eb695d00bc8497 (diff) | |
download | cpython-da79bcf8ac7ae72218ab023e1ed54390bc1a3a27.tar.gz |
Issue #29371: merge with 3.5
Diffstat (limited to 'Lib/http')
-rw-r--r-- | Lib/http/client.py | 241 | ||||
-rw-r--r-- | Lib/http/cookiejar.py | 6 | ||||
-rw-r--r-- | Lib/http/cookies.py | 5 | ||||
-rw-r--r-- | Lib/http/server.py | 66 |
4 files changed, 223 insertions, 95 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index 352c1017ad..a8e59b9561 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1,4 +1,4 @@ -"""HTTP/1.1 client library +r"""HTTP/1.1 client library <intro stuff goes here> <other stuff, too> @@ -420,6 +420,7 @@ class HTTPResponse(io.BufferedIOBase): self.fp.flush() def readable(self): + """Always returns True""" return True # End of "raw stream" methods @@ -467,6 +468,10 @@ class HTTPResponse(io.BufferedIOBase): return s def readinto(self, b): + """Read up to len(b) bytes into bytearray b and return the number + of bytes read. + """ + if self.fp is None: return 0 @@ -706,6 +711,17 @@ class HTTPResponse(io.BufferedIOBase): return self.fp.fileno() def getheader(self, name, default=None): + '''Returns the value of the header matching *name*. + + If there are multiple matching headers, the values are + combined into a single string separated by commas and spaces. + + If no matching header is found, returns *default* or None if + the *default* is not specified. + + If the headers are unknown, raises http.client.ResponseNotReady. + + ''' if self.headers is None: raise ResponseNotReady() headers = self.headers.get_all(name) or default @@ -728,12 +744,45 @@ class HTTPResponse(io.BufferedIOBase): # For compatibility with old-style urllib responses. def info(self): + '''Returns an instance of the class mimetools.Message containing + meta-information associated with the URL. + + When the method is HTTP, these headers are those returned by + the server at the head of the retrieved HTML page (including + Content-Length and Content-Type). + + When the method is FTP, a Content-Length header will be + present if (as is now usual) the server passed back a file + length in response to the FTP retrieval request. A + Content-Type header will be present if the MIME type can be + guessed. + + When the method is local-file, returned headers will include + a Date representing the file's last-modified time, a + Content-Length giving file size, and a Content-Type + containing a guess at the file's type. See also the + description of the mimetools module. + + ''' return self.headers def geturl(self): + '''Return the real URL of the page. + + In some cases, the HTTP server redirects a client to another + URL. The urlopen() function handles this transparently, but in + some cases the caller needs to know which URL the client was + redirected to. The geturl() method can be used to get at this + redirected URL. + + ''' return self.url def getcode(self): + '''Return the HTTP status code that was sent with the response, + or None if the URL is not an HTTP URL. + + ''' return self.status class HTTPConnection: @@ -746,6 +795,44 @@ class HTTPConnection: auto_open = 1 debuglevel = 0 + @staticmethod + def _is_textIO(stream): + """Test whether a file-like object is a text or a binary stream. + """ + return isinstance(stream, io.TextIOBase) + + @staticmethod + def _get_content_length(body, method): + """Get the content-length based on the body. + + If the body is None, we set Content-Length: 0 for methods that expect + a body (RFC 7230, Section 3.3.2). We also set the Content-Length for + any method if the body is a str or bytes-like object and not a file. + """ + if body is None: + # do an explicit check for not None here to distinguish + # between unset and set but empty + if method.upper() in _METHODS_EXPECTING_BODY: + return 0 + else: + return None + + if hasattr(body, 'read'): + # file-like object. + return None + + try: + # does it implement the buffer protocol (bytes, bytearray, array)? + mv = memoryview(body) + return mv.nbytes + except TypeError: + pass + + if isinstance(body, str): + return len(body) + + return None + def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): self.timeout = timeout @@ -884,18 +971,9 @@ class HTTPConnection: if hasattr(data, "read") : if self.debuglevel > 0: print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") + encode = self._is_textIO(data) + if encode and self.debuglevel > 0: + print("encoding file using iso-8859-1") while 1: datablock = data.read(blocksize) if not datablock: @@ -921,7 +999,22 @@ class HTTPConnection: """ self._buffer.append(s) - def _send_output(self, message_body=None): + def _read_readable(self, readable): + blocksize = 8192 + if self.debuglevel > 0: + print("sendIng a read()able") + encode = self._is_textIO(readable) + if encode and self.debuglevel > 0: + print("encoding file using iso-8859-1") + while True: + datablock = readable.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode("iso-8859-1") + yield datablock + + def _send_output(self, message_body=None, encode_chunked=False): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. @@ -930,10 +1023,50 @@ class HTTPConnection: self._buffer.extend((b"", b"")) msg = b"\r\n".join(self._buffer) del self._buffer[:] - self.send(msg) + if message_body is not None: - self.send(message_body) + + # create a consistent interface to message_body + if hasattr(message_body, 'read'): + # Let file-like take precedence over byte-like. This + # is needed to allow the current position of mmap'ed + # files to be taken into account. + chunks = self._read_readable(message_body) + else: + try: + # this is solely to check to see if message_body + # implements the buffer API. it /would/ be easier + # to capture if PyObject_CheckBuffer was exposed + # to Python. + memoryview(message_body) + except TypeError: + try: + chunks = iter(message_body) + except TypeError: + raise TypeError("message_body should be a bytes-like " + "object or an iterable, got %r" + % type(message_body)) + else: + # the object implements the buffer interface and + # can be passed directly into socket methods + chunks = (message_body,) + + for chunk in chunks: + if not chunk: + if self.debuglevel > 0: + print('Zero length chunk ignored') + continue + + if encode_chunked and self._http_vsn == 11: + # chunked encoding + chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ + + b'\r\n' + self.send(chunk) + + if encode_chunked and self._http_vsn == 11: + # end chunked transfer + self.send(b'0\r\n\r\n') def putrequest(self, method, url, skip_host=False, skip_accept_encoding=False): @@ -1087,52 +1220,27 @@ class HTTPConnection: header = header + b': ' + value self._output(header) - def endheaders(self, message_body=None): + def endheaders(self, message_body=None, *, encode_chunked=False): """Indicate that the last header line has been sent to the server. This method sends the request to the server. The optional message_body argument can be used to pass a message body associated with the - request. The message body will be sent in the same packet as the - message headers if it is a string, otherwise it is sent as a separate - packet. + request. """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT else: raise CannotSendHeader() - self._send_output(message_body) + self._send_output(message_body, encode_chunked=encode_chunked) - def request(self, method, url, body=None, headers={}): + def request(self, method, url, body=None, headers={}, *, + encode_chunked=False): """Send a complete request to the server.""" - self._send_request(method, url, body, headers) - - def _set_content_length(self, body, method): - # Set the content-length based on the body. If the body is "empty", we - # set Content-Length: 0 for methods that expect a body (RFC 7230, - # Section 3.3.2). If the body is set for other methods, we set the - # header provided we can figure out what the length is. - thelen = None - method_expects_body = method.upper() in _METHODS_EXPECTING_BODY - if body is None and method_expects_body: - thelen = '0' - elif body is not None: - try: - thelen = str(len(body)) - except TypeError: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length', thelen) + self._send_request(method, url, body, headers, encode_chunked) - def _send_request(self, method, url, body, headers): + def _send_request(self, method, url, body, headers, encode_chunked): # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) + header_names = frozenset(k.lower() for k in headers) skips = {} if 'host' in header_names: skips['skip_host'] = 1 @@ -1141,15 +1249,40 @@ class HTTPConnection: self.putrequest(method, url, **skips) + # chunked encoding will happen if HTTP/1.1 is used and either + # the caller passes encode_chunked=True or the following + # conditions hold: + # 1. content-length has not been explicitly set + # 2. the body is a file or iterable, but not a str or bytes-like + # 3. Transfer-Encoding has NOT been explicitly set by the caller + if 'content-length' not in header_names: - self._set_content_length(body, method) + # only chunk body if not explicitly set for backwards + # compatibility, assuming the client code is already handling the + # chunking + if 'transfer-encoding' not in header_names: + # if content-length cannot be automatically determined, fall + # back to chunked encoding + encode_chunked = False + content_length = self._get_content_length(body, method) + if content_length is None: + if body is not None: + if self.debuglevel > 0: + print('Unable to determine size of %r' % body) + encode_chunked = True + self.putheader('Transfer-Encoding', 'chunked') + else: + self.putheader('Content-Length', str(content_length)) + else: + encode_chunked = False + for hdr, value in headers.items(): self.putheader(hdr, value) if isinstance(body, str): # RFC 2616 Section 3.7.1 says that text default has a # default charset of iso-8859-1. body = _encode(body, 'body') - self.endheaders(body) + self.endheaders(body, encode_chunked=encode_chunked) def getresponse(self): """Get the response from the server. @@ -1232,6 +1365,12 @@ else: check_hostname=None): super(HTTPSConnection, self).__init__(host, port, timeout, source_address) + if (key_file is not None or cert_file is not None or + check_hostname is not None): + import warnings + warnings.warn("key_file, cert_file and check_hostname are " + "deprecated, use a custom context instead.", + DeprecationWarning, 2) self.key_file = key_file self.cert_file = cert_file if context is None: diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index 6d4572af03..adf956d66a 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -200,7 +200,7 @@ def _str2time(day, mon, yr, hr, min, sec, tz): STRICT_DATE_RE = re.compile( r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) + r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) WEEKDAY_RE = re.compile( r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) LOOSE_HTTP_DATE_RE = re.compile( @@ -277,7 +277,7 @@ def http2time(text): return _str2time(day, mon, yr, hr, min, sec, tz) ISO_DATE_RE = re.compile( - """^ + r"""^ (\d{4}) # year [-\/]? (\d\d?) # numerical month @@ -411,7 +411,7 @@ def split_header_words(header_values): pairs = [] else: # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) + non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) assert nr_junk_chars > 0, ( "split_header_words bug: '%s', '%s', %s" % (orig_text, text, pairs)) diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index a73fe387f8..be3b080aa3 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -456,9 +456,8 @@ class Morsel(dict): # _LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" -_LegalValueChars = _LegalKeyChars + '\[\]' +_LegalValueChars = _LegalKeyChars + r'\[\]' _CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern \s* # Optional whitespace at start of cookie (?P<key> # Start of group 'key' [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter @@ -475,7 +474,7 @@ _CookiePattern = re.compile(r""" )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII) # May be removed if safe. + """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe. # At long last, here is the cookie class. Using this class is almost just like diff --git a/Lib/http/server.py b/Lib/http/server.py index 00620d1f85..e12e45bfc3 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -87,6 +87,7 @@ __all__ = [ "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", ] +import email.utils import html import http.client import io @@ -126,9 +127,6 @@ DEFAULT_ERROR_MESSAGE = """\ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" -def _quote_html(html): - return html.replace("&", "&").replace("<", "<").replace(">", ">") - class HTTPServer(socketserver.TCPServer): allow_reuse_address = 1 # Seems to make sense in testing environment @@ -136,7 +134,7 @@ class HTTPServer(socketserver.TCPServer): def server_bind(self): """Override server_bind to store the server name.""" socketserver.TCPServer.server_bind(self) - host, port = self.socket.getsockname()[:2] + host, port = self.server_address[:2] self.server_name = socket.getfqdn(host) self.server_port = port @@ -282,12 +280,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): words = requestline.split() if len(words) == 3: command, path, version = words - if version[:5] != 'HTTP/': - self.send_error( - HTTPStatus.BAD_REQUEST, - "Bad request version (%r)" % version) - return False try: + if version[:5] != 'HTTP/': + raise ValueError base_version_number = version.split('/', 1)[1] version_number = base_version_number.split(".") # RFC 2145 section 3.1 says there can be only one "." and @@ -309,7 +304,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): if version_number >= (2, 0): self.send_error( HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, - "Invalid HTTP Version (%s)" % base_version_number) + "Invalid HTTP version (%s)" % base_version_number) return False elif len(words) == 2: command, path = words @@ -332,10 +327,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): try: self.headers = http.client.parse_headers(self.rfile, _class=self.MessageClass) - except http.client.LineTooLong: + except http.client.LineTooLong as err: self.send_error( - HTTPStatus.BAD_REQUEST, - "Line too long") + HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, + "Line too long", + str(err)) return False except http.client.HTTPException as err: self.send_error( @@ -465,8 +461,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): # (see bug #1100201) content = (self.error_message_format % { 'code': code, - 'message': _quote_html(message), - 'explain': _quote_html(explain) + 'message': html.escape(message, quote=False), + 'explain': html.escape(explain, quote=False) }) body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) @@ -491,12 +487,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): def send_response_only(self, code, message=None): """Send the response header only.""" - if message is None: - if code in self.responses: - message = self.responses[code][0] - else: - message = '' if self.request_version != 'HTTP/0.9': + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' if not hasattr(self, '_headers_buffer'): self._headers_buffer = [] self._headers_buffer.append(("%s %d %s\r\n" % @@ -583,12 +579,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): """Return the current date and time formatted for a message header.""" if timestamp is None: timestamp = time.time() - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) - s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - self.weekdayname[wd], - day, self.monthname[month], year, - hh, mm, ss) - return s + return email.utils.formatdate(timestamp, usegmt=True) def log_date_time_string(self): """Return the current time formatted for logging.""" @@ -726,7 +717,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): errors='surrogatepass') except UnicodeDecodeError: displaypath = urllib.parse.unquote(path) - displaypath = html.escape(displaypath) + displaypath = html.escape(displaypath, quote=False) enc = sys.getfilesystemencoding() title = 'Directory listing for %s' % displaypath r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' @@ -750,7 +741,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): r.append('<li><a href="%s">%s</a></li>' % (urllib.parse.quote(linkname, errors='surrogatepass'), - html.escape(displayname))) + html.escape(displayname, quote=False))) r.append('</ul>\n<hr>\n</body>\n</html>\n') encoded = '\n'.join(r).encode(enc, 'surrogateescape') f = io.BytesIO() @@ -1191,16 +1182,15 @@ def test(HandlerClass=BaseHTTPRequestHandler, server_address = (bind, port) HandlerClass.protocol_version = protocol - httpd = ServerClass(server_address, HandlerClass) - - sa = httpd.socket.getsockname() - print("Serving HTTP on", sa[0], "port", sa[1], "...") - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\nKeyboard interrupt received, exiting.") - httpd.server_close() - sys.exit(0) + with ServerClass(server_address, HandlerClass) as httpd: + sa = httpd.socket.getsockname() + serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." + print(serve_message.format(host=sa[0], port=sa[1])) + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + sys.exit(0) if __name__ == '__main__': parser = argparse.ArgumentParser() |