summaryrefslogtreecommitdiff
path: root/paste/proxy.py
diff options
context:
space:
mode:
Diffstat (limited to 'paste/proxy.py')
-rw-r--r--paste/proxy.py289
1 files changed, 289 insertions, 0 deletions
diff --git a/paste/proxy.py b/paste/proxy.py
new file mode 100644
index 0000000..67d4b1b
--- /dev/null
+++ b/paste/proxy.py
@@ -0,0 +1,289 @@
+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+"""
+An application that proxies WSGI requests to a remote server.
+
+TODO:
+
+* Send ``Via`` header? It's not clear to me this is a Via in the
+ style of a typical proxy.
+
+* Other headers or metadata? I put in X-Forwarded-For, but that's it.
+
+* Signed data of non-HTTP keys? This would be for things like
+ REMOTE_USER.
+
+* Something to indicate what the original URL was? The original host,
+ scheme, and base path.
+
+* Rewriting ``Location`` headers? mod_proxy does this.
+
+* Rewriting body? (Probably not on this one -- that can be done with
+ a different middleware that wraps this middleware)
+
+* Example::
+
+ use = egg:Paste#proxy
+ address = http://server3:8680/exist/rest/db/orgs/sch/config/
+ allowed_request_methods = GET
+
+"""
+
+from six.moves import http_client as httplib
+from six.moves.urllib import parse as urlparse
+from six.moves.urllib.parse import quote
+import six
+
+from paste import httpexceptions
+from paste.util.converters import aslist
+
+# Remove these headers from response (specify lower case header
+# names):
+filtered_headers = (
+ 'transfer-encoding',
+ 'connection',
+ 'keep-alive',
+ 'proxy-authenticate',
+ 'proxy-authorization',
+ 'te',
+ 'trailers',
+ 'upgrade',
+)
+
+class Proxy(object):
+
+ def __init__(self, address, allowed_request_methods=(),
+ suppress_http_headers=()):
+ self.address = address
+ self.parsed = urlparse.urlsplit(address)
+ self.scheme = self.parsed[0].lower()
+ self.host = self.parsed[1]
+ self.path = self.parsed[2]
+ self.allowed_request_methods = [
+ x.lower() for x in allowed_request_methods if x]
+
+ self.suppress_http_headers = [
+ x.lower() for x in suppress_http_headers if x]
+
+ def __call__(self, environ, start_response):
+ if (self.allowed_request_methods and
+ environ['REQUEST_METHOD'].lower() not in self.allowed_request_methods):
+ return httpexceptions.HTTPBadRequest("Disallowed")(environ, start_response)
+
+ if self.scheme == 'http':
+ ConnClass = httplib.HTTPConnection
+ elif self.scheme == 'https':
+ ConnClass = httplib.HTTPSConnection
+ else:
+ raise ValueError(
+ "Unknown scheme for %r: %r" % (self.address, self.scheme))
+ conn = ConnClass(self.host)
+ headers = {}
+ for key, value in environ.items():
+ if key.startswith('HTTP_'):
+ key = key[5:].lower().replace('_', '-')
+ if key == 'host' or key in self.suppress_http_headers:
+ continue
+ headers[key] = value
+ headers['host'] = self.host
+ if 'REMOTE_ADDR' in environ:
+ headers['x-forwarded-for'] = environ['REMOTE_ADDR']
+ if environ.get('CONTENT_TYPE'):
+ headers['content-type'] = environ['CONTENT_TYPE']
+ if environ.get('CONTENT_LENGTH'):
+ if environ['CONTENT_LENGTH'] == '-1':
+ # This is a special case, where the content length is basically undetermined
+ body = environ['wsgi.input'].read(-1)
+ headers['content-length'] = str(len(body))
+ else:
+ headers['content-length'] = environ['CONTENT_LENGTH']
+ length = int(environ['CONTENT_LENGTH'])
+ body = environ['wsgi.input'].read(length)
+ else:
+ body = ''
+
+ path_info = quote(environ['PATH_INFO'])
+ if self.path:
+ request_path = path_info
+ if request_path and request_path[0] == '/':
+ request_path = request_path[1:]
+
+ path = urlparse.urljoin(self.path, request_path)
+ else:
+ path = path_info
+ if environ.get('QUERY_STRING'):
+ path += '?' + environ['QUERY_STRING']
+
+ conn.request(environ['REQUEST_METHOD'],
+ path,
+ body, headers)
+ res = conn.getresponse()
+ headers_out = parse_headers(res.msg)
+
+ status = '%s %s' % (res.status, res.reason)
+ start_response(status, headers_out)
+ # @@: Default?
+ length = res.getheader('content-length')
+ if length is not None:
+ body = res.read(int(length))
+ else:
+ body = res.read()
+ conn.close()
+ return [body]
+
+def make_proxy(global_conf, address, allowed_request_methods="",
+ suppress_http_headers=""):
+ """
+ Make a WSGI application that proxies to another address:
+
+ ``address``
+ the full URL ending with a trailing ``/``
+
+ ``allowed_request_methods``:
+ a space seperated list of request methods (e.g., ``GET POST``)
+
+ ``suppress_http_headers``
+ a space seperated list of http headers (lower case, without
+ the leading ``http_``) that should not be passed on to target
+ host
+ """
+ allowed_request_methods = aslist(allowed_request_methods)
+ suppress_http_headers = aslist(suppress_http_headers)
+ return Proxy(
+ address,
+ allowed_request_methods=allowed_request_methods,
+ suppress_http_headers=suppress_http_headers)
+
+
+class TransparentProxy(object):
+
+ """
+ A proxy that sends the request just as it was given, including
+ respecting HTTP_HOST, wsgi.url_scheme, etc.
+
+ This is a way of translating WSGI requests directly to real HTTP
+ requests. All information goes in the environment; modify it to
+ modify the way the request is made.
+
+ If you specify ``force_host`` (and optionally ``force_scheme``)
+ then HTTP_HOST won't be used to determine where to connect to;
+ instead a specific host will be connected to, but the ``Host``
+ header in the request will remain intact.
+ """
+
+ def __init__(self, force_host=None,
+ force_scheme='http'):
+ self.force_host = force_host
+ self.force_scheme = force_scheme
+
+ def __repr__(self):
+ return '<%s %s force_host=%r force_scheme=%r>' % (
+ self.__class__.__name__,
+ hex(id(self)),
+ self.force_host, self.force_scheme)
+
+ def __call__(self, environ, start_response):
+ scheme = environ['wsgi.url_scheme']
+ if self.force_host is None:
+ conn_scheme = scheme
+ else:
+ conn_scheme = self.force_scheme
+ if conn_scheme == 'http':
+ ConnClass = httplib.HTTPConnection
+ elif conn_scheme == 'https':
+ ConnClass = httplib.HTTPSConnection
+ else:
+ raise ValueError(
+ "Unknown scheme %r" % scheme)
+ if 'HTTP_HOST' not in environ:
+ raise ValueError(
+ "WSGI environ must contain an HTTP_HOST key")
+ host = environ['HTTP_HOST']
+ if self.force_host is None:
+ conn_host = host
+ else:
+ conn_host = self.force_host
+ conn = ConnClass(conn_host)
+ headers = {}
+ for key, value in environ.items():
+ if key.startswith('HTTP_'):
+ key = key[5:].lower().replace('_', '-')
+ headers[key] = value
+ headers['host'] = host
+ if 'REMOTE_ADDR' in environ and 'HTTP_X_FORWARDED_FOR' not in environ:
+ headers['x-forwarded-for'] = environ['REMOTE_ADDR']
+ if environ.get('CONTENT_TYPE'):
+ headers['content-type'] = environ['CONTENT_TYPE']
+ if environ.get('CONTENT_LENGTH'):
+ length = int(environ['CONTENT_LENGTH'])
+ body = environ['wsgi.input'].read(length)
+ if length == -1:
+ environ['CONTENT_LENGTH'] = str(len(body))
+ elif 'CONTENT_LENGTH' not in environ:
+ body = ''
+ length = 0
+ else:
+ body = ''
+ length = 0
+
+ path = (environ.get('SCRIPT_NAME', '')
+ + environ.get('PATH_INFO', ''))
+ path = quote(path)
+ if 'QUERY_STRING' in environ:
+ path += '?' + environ['QUERY_STRING']
+ conn.request(environ['REQUEST_METHOD'],
+ path, body, headers)
+ res = conn.getresponse()
+ headers_out = parse_headers(res.msg)
+
+ status = '%s %s' % (res.status, res.reason)
+ start_response(status, headers_out)
+ # @@: Default?
+ length = res.getheader('content-length')
+ if length is not None:
+ body = res.read(int(length))
+ else:
+ body = res.read()
+ conn.close()
+ return [body]
+
+def parse_headers(message):
+ """
+ Turn a Message object into a list of WSGI-style headers.
+ """
+ headers_out = []
+ if six.PY3:
+ for header, value in message.items():
+ if header.lower() not in filtered_headers:
+ headers_out.append((header, value))
+ else:
+ for full_header in message.headers:
+ if not full_header:
+ # Shouldn't happen, but we'll just ignore
+ continue
+ if full_header[0].isspace():
+ # Continuation line, add to the last header
+ if not headers_out:
+ raise ValueError(
+ "First header starts with a space (%r)" % full_header)
+ last_header, last_value = headers_out.pop()
+ value = last_value + ' ' + full_header.strip()
+ headers_out.append((last_header, value))
+ continue
+ try:
+ header, value = full_header.split(':', 1)
+ except:
+ raise ValueError("Invalid header: %r" % full_header)
+ value = value.strip()
+ if header.lower() not in filtered_headers:
+ headers_out.append((header, value))
+ return headers_out
+
+def make_transparent_proxy(
+ global_conf, force_host=None, force_scheme='http'):
+ """
+ Create a proxy that connects to a specific host, but does
+ absolutely no other filtering, including the Host header.
+ """
+ return TransparentProxy(force_host=force_host,
+ force_scheme=force_scheme)