summaryrefslogtreecommitdiff
path: root/paste/gzipper.py
diff options
context:
space:
mode:
authorMarc Abramowitz <marc@marc-abramowitz.com>2015-04-30 17:39:24 -0700
committerMarc Abramowitz <marc@marc-abramowitz.com>2015-04-30 17:39:24 -0700
commitfa100c92c06d3a8a61a0dda1a2e06018437b09c6 (patch)
treea1cc50f93fbf257685c3849e03496c5e33949281 /paste/gzipper.py
downloadpaste-git-fa100c92c06d3a8a61a0dda1a2e06018437b09c6.tar.gz
test_wsgirequest_charset: Use UTF-8 instead of iso-8859-1test_wsgirequest_charset_use_UTF-8_instead_of_iso-8859-1
because it seems that the defacto standard for encoding URIs is to use UTF-8. I've been reading about url encoding and it seems like perhaps using an encoding other than UTF-8 is very non-standard and not well-supported (this test is trying to use `iso-8859-1`). From http://en.wikipedia.org/wiki/Percent-encoding > For a non-ASCII character, it is typically converted to its byte sequence in > UTF-8, and then each byte value is represented as above. > The generic URI syntax mandates that new URI schemes that provide for the > representation of character data in a URI must, in effect, represent > characters from the unreserved set without translation, and should convert > all other characters to bytes according to UTF-8, and then percent-encode > those values. This requirement was introduced in January 2005 with the > publication of RFC 3986 From http://tools.ietf.org/html/rfc3986: > Non-ASCII characters must first be encoded according to UTF-8 [STD63], and > then each octet of the corresponding UTF-8 sequence must be percent-encoded > to be represented as URI characters. URI producing applications must not use > percent-encoding in host unless it is used to represent a UTF-8 character > sequence. From http://tools.ietf.org/html/rfc3987: > Conversions from URIs to IRIs MUST NOT use any character encoding other than > UTF-8 in steps 3 and 4, even if it might be possible to guess from the > context that another character encoding than UTF-8 was used in the URI. For > example, the URI "http://www.example.org/r%E9sum%E9.html" might with some > guessing be interpreted to contain two e-acute characters encoded as > iso-8859-1. It must not be converted to an IRI containing these e-acute > characters. Otherwise, in the future the IRI will be mapped to > "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different URI from > "http://www.example.org/r%E9sum%E9.html". See issue #7, which I think this at least partially fixes.
Diffstat (limited to 'paste/gzipper.py')
-rw-r--r--paste/gzipper.py107
1 files changed, 107 insertions, 0 deletions
diff --git a/paste/gzipper.py b/paste/gzipper.py
new file mode 100644
index 0000000..eca8775
--- /dev/null
+++ b/paste/gzipper.py
@@ -0,0 +1,107 @@
+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+
+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+
+"""
+WSGI middleware
+
+Gzip-encodes the response.
+"""
+
+import gzip
+from paste.response import header_value, remove_header
+from paste.httpheaders import CONTENT_LENGTH
+import six
+
+class GzipOutput(object):
+ pass
+
+class middleware(object):
+
+ def __init__(self, application, compress_level=6):
+ self.application = application
+ self.compress_level = int(compress_level)
+
+ def __call__(self, environ, start_response):
+ if 'gzip' not in environ.get('HTTP_ACCEPT_ENCODING', ''):
+ # nothing for us to do, so this middleware will
+ # be a no-op:
+ return self.application(environ, start_response)
+ response = GzipResponse(start_response, self.compress_level)
+ app_iter = self.application(environ,
+ response.gzip_start_response)
+ if app_iter is not None:
+ response.finish_response(app_iter)
+
+ return response.write()
+
+class GzipResponse(object):
+
+ def __init__(self, start_response, compress_level):
+ self.start_response = start_response
+ self.compress_level = compress_level
+ self.buffer = six.BytesIO()
+ self.compressible = False
+ self.content_length = None
+
+ def gzip_start_response(self, status, headers, exc_info=None):
+ self.headers = headers
+ ct = header_value(headers,'content-type')
+ ce = header_value(headers,'content-encoding')
+ self.compressible = False
+ if ct and (ct.startswith('text/') or ct.startswith('application/')) \
+ and 'zip' not in ct:
+ self.compressible = True
+ if ce:
+ self.compressible = False
+ if self.compressible:
+ headers.append(('content-encoding', 'gzip'))
+ remove_header(headers, 'content-length')
+ self.headers = headers
+ self.status = status
+ return self.buffer.write
+
+ def write(self):
+ out = self.buffer
+ out.seek(0)
+ s = out.getvalue()
+ out.close()
+ return [s]
+
+ def finish_response(self, app_iter):
+ if self.compressible:
+ output = gzip.GzipFile(mode='wb', compresslevel=self.compress_level,
+ fileobj=self.buffer)
+ else:
+ output = self.buffer
+ try:
+ for s in app_iter:
+ output.write(s)
+ if self.compressible:
+ output.close()
+ finally:
+ if hasattr(app_iter, 'close'):
+ app_iter.close()
+ content_length = self.buffer.tell()
+ CONTENT_LENGTH.update(self.headers, content_length)
+ self.start_response(self.status, self.headers)
+
+def filter_factory(application, **conf):
+ import warnings
+ warnings.warn(
+ 'This function is deprecated; use make_gzip_middleware instead',
+ DeprecationWarning, 2)
+ def filter(application):
+ return middleware(application)
+ return filter
+
+def make_gzip_middleware(app, global_conf, compress_level=6):
+ """
+ Wrap the middleware, so that it applies gzipping to a response
+ when it is supported by the browser and the content is of
+ type ``text/*`` or ``application/*``
+ """
+ compress_level = int(compress_level)
+ return middleware(app, compress_level=compress_level)