summaryrefslogtreecommitdiff
path: root/swift/common/http_protocol.py
diff options
context:
space:
mode:
Diffstat (limited to 'swift/common/http_protocol.py')
-rw-r--r--swift/common/http_protocol.py246
1 files changed, 246 insertions, 0 deletions
diff --git a/swift/common/http_protocol.py b/swift/common/http_protocol.py
new file mode 100644
index 000000000..59d7767de
--- /dev/null
+++ b/swift/common/http_protocol.py
@@ -0,0 +1,246 @@
+# Copyright (c) 2010-2022 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from eventlet import wsgi, websocket
+import six
+
+from swift.common.swob import wsgi_quote, wsgi_unquote, \
+ wsgi_quote_plus, wsgi_unquote_plus, wsgi_to_bytes, bytes_to_wsgi
+
+
+class SwiftHttpProtocol(wsgi.HttpProtocol):
+ default_request_version = "HTTP/1.0"
+
+ def __init__(self, *args, **kwargs):
+ # See https://github.com/eventlet/eventlet/pull/590
+ self.pre_shutdown_bugfix_eventlet = not getattr(
+ websocket.WebSocketWSGI, '_WSGI_APP_ALWAYS_IDLE', None)
+ # Note this is not a new-style class, so super() won't work
+ wsgi.HttpProtocol.__init__(self, *args, **kwargs)
+
+ def log_request(self, *a):
+ """
+ Turn off logging requests by the underlying WSGI software.
+ """
+ pass
+
+ def log_message(self, f, *a):
+ """
+ Redirect logging other messages by the underlying WSGI software.
+ """
+ logger = getattr(self.server.app, 'logger', None)
+ if logger:
+ logger.error('ERROR WSGI: ' + f, *a)
+ else:
+ # eventlet<=0.17.4 doesn't have an error method, and in newer
+ # versions the output from error is same as info anyway
+ self.server.log.info('ERROR WSGI: ' + f, *a)
+
+ class MessageClass(wsgi.HttpProtocol.MessageClass):
+ '''Subclass to see when the client didn't provide a Content-Type'''
+ # for py2:
+ def parsetype(self):
+ if self.typeheader is None:
+ self.typeheader = ''
+ wsgi.HttpProtocol.MessageClass.parsetype(self)
+
+ # for py3:
+ def get_default_type(self):
+ '''If the client didn't provide a content type, leave it blank.'''
+ return ''
+
+ def parse_request(self):
+ # Need to track the bytes-on-the-wire for S3 signatures -- eventlet
+ # would do it for us, but since we rewrite the path on py3, we need to
+ # fix it ourselves later.
+ self.__raw_path_info = None
+
+ if not six.PY2:
+ # request lines *should* be ascii per the RFC, but historically
+ # we've allowed (and even have func tests that use) arbitrary
+ # bytes. This breaks on py3 (see https://bugs.python.org/issue33973
+ # ) but the work-around is simple: munge the request line to be
+ # properly quoted.
+ if self.raw_requestline.count(b' ') >= 2:
+ parts = self.raw_requestline.split(b' ', 2)
+ path, q, query = parts[1].partition(b'?')
+ self.__raw_path_info = path
+ # unquote first, so we don't over-quote something
+ # that was *correctly* quoted
+ path = wsgi_to_bytes(wsgi_quote(wsgi_unquote(
+ bytes_to_wsgi(path))))
+ query = b'&'.join(
+ sep.join([
+ wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus(
+ bytes_to_wsgi(key)))),
+ wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus(
+ bytes_to_wsgi(val))))
+ ])
+ for part in query.split(b'&')
+ for key, sep, val in (part.partition(b'='), ))
+ parts[1] = path + q + query
+ self.raw_requestline = b' '.join(parts)
+ # else, mangled protocol, most likely; let base class deal with it
+ return wsgi.HttpProtocol.parse_request(self)
+
+ if not six.PY2:
+ def get_environ(self, *args, **kwargs):
+ environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs)
+ environ['RAW_PATH_INFO'] = bytes_to_wsgi(
+ self.__raw_path_info)
+ header_payload = self.headers.get_payload()
+ if isinstance(header_payload, list) and len(header_payload) == 1:
+ header_payload = header_payload[0].get_payload()
+ if header_payload:
+ # This shouldn't be here. We must've bumped up against
+ # https://bugs.python.org/issue37093
+ headers_raw = list(environ['headers_raw'])
+ for line in header_payload.rstrip('\r\n').split('\n'):
+ if ':' not in line or line[:1] in ' \t':
+ # Well, we're no more broken than we were before...
+ # Should we support line folding?
+ # Should we 400 a bad header line?
+ break
+ header, value = line.split(':', 1)
+ value = value.strip(' \t\n\r')
+ # NB: Eventlet looks at the headers obj to figure out
+ # whether the client said the connection should close;
+ # see https://github.com/eventlet/eventlet/blob/v0.25.0/
+ # eventlet/wsgi.py#L504
+ self.headers.add_header(header, value)
+ headers_raw.append((header, value))
+ wsgi_key = 'HTTP_' + header.replace('-', '_').encode(
+ 'latin1').upper().decode('latin1')
+ if wsgi_key in ('HTTP_CONTENT_LENGTH',
+ 'HTTP_CONTENT_TYPE'):
+ wsgi_key = wsgi_key[5:]
+ environ[wsgi_key] = value
+ environ['headers_raw'] = tuple(headers_raw)
+ # Since we parsed some more headers, check to see if they
+ # change how our wsgi.input should behave
+ te = environ.get('HTTP_TRANSFER_ENCODING', '').lower()
+ if te.rsplit(',', 1)[-1].strip() == 'chunked':
+ environ['wsgi.input'].chunked_input = True
+ else:
+ length = environ.get('CONTENT_LENGTH')
+ if length:
+ length = int(length)
+ environ['wsgi.input'].content_length = length
+ if environ.get('HTTP_EXPECT', '').lower() == '100-continue':
+ environ['wsgi.input'].wfile = self.wfile
+ environ['wsgi.input'].wfile_line = \
+ b'HTTP/1.1 100 Continue\r\n'
+ return environ
+
+ def _read_request_line(self):
+ # Note this is not a new-style class, so super() won't work
+ got = wsgi.HttpProtocol._read_request_line(self)
+ # See https://github.com/eventlet/eventlet/pull/590
+ if self.pre_shutdown_bugfix_eventlet:
+ self.conn_state[2] = wsgi.STATE_REQUEST
+ return got
+
+ def handle_one_request(self):
+ # Note this is not a new-style class, so super() won't work
+ got = wsgi.HttpProtocol.handle_one_request(self)
+ # See https://github.com/eventlet/eventlet/pull/590
+ if self.pre_shutdown_bugfix_eventlet:
+ if self.conn_state[2] != wsgi.STATE_CLOSE:
+ self.conn_state[2] = wsgi.STATE_IDLE
+ return got
+
+
+class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
+ """
+ Protocol object that speaks HTTP, including multiple requests, but with
+ a single PROXY line as the very first thing coming in over the socket.
+ This is so we can learn what the client's IP address is when Swift is
+ behind a TLS terminator, like hitch, that does not understand HTTP and
+ so cannot add X-Forwarded-For or other similar headers.
+
+ See http://www.haproxy.org/download/1.7/doc/proxy-protocol.txt for
+ protocol details.
+ """
+ def __init__(self, *a, **kw):
+ self.proxy_address = None
+ SwiftHttpProtocol.__init__(self, *a, **kw)
+
+ def handle_error(self, connection_line):
+ if not six.PY2:
+ connection_line = connection_line.decode('latin-1')
+
+ # No further processing will proceed on this connection under any
+ # circumstances. We always send the request into the superclass to
+ # handle any cleanup - this ensures that the request will not be
+ # processed.
+ self.rfile.close()
+ # We don't really have any confidence that an HTTP Error will be
+ # processable by the client as our transmission broken down between
+ # ourselves and our gateway proxy before processing the client
+ # protocol request. Hopefully the operator will know what to do!
+ msg = 'Invalid PROXY line %r' % connection_line
+ self.log_message(msg)
+ # Even assuming HTTP we don't even known what version of HTTP the
+ # client is sending? This entire endeavor seems questionable.
+ self.request_version = self.default_request_version
+ # appease http.server
+ self.command = 'PROXY'
+ self.send_error(400, msg)
+
+ def handle(self):
+ """Handle multiple requests if necessary."""
+ # ensure the opening line for the connection is a valid PROXY protcol
+ # line; this is the only IO we do on this connection before any
+ # additional wrapping further pollutes the raw socket.
+ connection_line = self.rfile.readline(self.server.url_length_limit)
+
+ if not connection_line.startswith(b'PROXY '):
+ return self.handle_error(connection_line)
+
+ proxy_parts = connection_line.strip(b'\r\n').split(b' ')
+ if proxy_parts[1].startswith(b'UNKNOWN'):
+ # "UNKNOWN", in PROXY protocol version 1, means "not
+ # TCP4 or TCP6". This includes completely legitimate
+ # things like QUIC or Unix domain sockets. The PROXY
+ # protocol (section 2.1) states that the receiver
+ # (that's us) MUST ignore anything after "UNKNOWN" and
+ # before the CRLF, essentially discarding the first
+ # line.
+ pass
+ elif proxy_parts[1] in (b'TCP4', b'TCP6') and len(proxy_parts) == 6:
+ if six.PY2:
+ self.client_address = (proxy_parts[2], proxy_parts[4])
+ self.proxy_address = (proxy_parts[3], proxy_parts[5])
+ else:
+ self.client_address = (
+ proxy_parts[2].decode('latin-1'),
+ proxy_parts[4].decode('latin-1'))
+ self.proxy_address = (
+ proxy_parts[3].decode('latin-1'),
+ proxy_parts[5].decode('latin-1'))
+ else:
+ self.handle_error(connection_line)
+
+ return SwiftHttpProtocol.handle(self)
+
+ def get_environ(self, *args, **kwargs):
+ environ = SwiftHttpProtocol.get_environ(self, *args, **kwargs)
+ if self.proxy_address:
+ environ['SERVER_ADDR'] = self.proxy_address[0]
+ environ['SERVER_PORT'] = self.proxy_address[1]
+ if self.proxy_address[1] == '443':
+ environ['wsgi.url_scheme'] = 'https'
+ environ['HTTPS'] = 'on'
+ return environ