diff options
author | Marc Abramowitz <marc@marc-abramowitz.com> | 2015-04-30 17:39:24 -0700 |
---|---|---|
committer | Marc Abramowitz <marc@marc-abramowitz.com> | 2015-04-30 17:39:24 -0700 |
commit | fa100c92c06d3a8a61a0dda1a2e06018437b09c6 (patch) | |
tree | a1cc50f93fbf257685c3849e03496c5e33949281 /paste/gzipper.py | |
download | paste-git-fa100c92c06d3a8a61a0dda1a2e06018437b09c6.tar.gz |
test_wsgirequest_charset: Use UTF-8 instead of iso-8859-1test_wsgirequest_charset_use_UTF-8_instead_of_iso-8859-1
because it seems that the defacto standard for encoding URIs is to use UTF-8.
I've been reading about url encoding and it seems like perhaps using an
encoding other than UTF-8 is very non-standard and not well-supported (this
test is trying to use `iso-8859-1`).
From http://en.wikipedia.org/wiki/Percent-encoding
> For a non-ASCII character, it is typically converted to its byte sequence in
> UTF-8, and then each byte value is represented as above.
> The generic URI syntax mandates that new URI schemes that provide for the
> representation of character data in a URI must, in effect, represent
> characters from the unreserved set without translation, and should convert
> all other characters to bytes according to UTF-8, and then percent-encode
> those values. This requirement was introduced in January 2005 with the
> publication of RFC 3986
From http://tools.ietf.org/html/rfc3986:
> Non-ASCII characters must first be encoded according to UTF-8 [STD63], and
> then each octet of the corresponding UTF-8 sequence must be percent-encoded
> to be represented as URI characters. URI producing applications must not use
> percent-encoding in host unless it is used to represent a UTF-8 character
> sequence.
From http://tools.ietf.org/html/rfc3987:
> Conversions from URIs to IRIs MUST NOT use any character encoding other than
> UTF-8 in steps 3 and 4, even if it might be possible to guess from the
> context that another character encoding than UTF-8 was used in the URI. For
> example, the URI "http://www.example.org/r%E9sum%E9.html" might with some
> guessing be interpreted to contain two e-acute characters encoded as
> iso-8859-1. It must not be converted to an IRI containing these e-acute
> characters. Otherwise, in the future the IRI will be mapped to
> "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different URI from
> "http://www.example.org/r%E9sum%E9.html".
See issue #7, which I think this at least partially fixes.
Diffstat (limited to 'paste/gzipper.py')
-rw-r--r-- | paste/gzipper.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/paste/gzipper.py b/paste/gzipper.py new file mode 100644 index 0000000..eca8775 --- /dev/null +++ b/paste/gzipper.py @@ -0,0 +1,107 @@ +# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php + +# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php + +""" +WSGI middleware + +Gzip-encodes the response. +""" + +import gzip +from paste.response import header_value, remove_header +from paste.httpheaders import CONTENT_LENGTH +import six + +class GzipOutput(object): + pass + +class middleware(object): + + def __init__(self, application, compress_level=6): + self.application = application + self.compress_level = int(compress_level) + + def __call__(self, environ, start_response): + if 'gzip' not in environ.get('HTTP_ACCEPT_ENCODING', ''): + # nothing for us to do, so this middleware will + # be a no-op: + return self.application(environ, start_response) + response = GzipResponse(start_response, self.compress_level) + app_iter = self.application(environ, + response.gzip_start_response) + if app_iter is not None: + response.finish_response(app_iter) + + return response.write() + +class GzipResponse(object): + + def __init__(self, start_response, compress_level): + self.start_response = start_response + self.compress_level = compress_level + self.buffer = six.BytesIO() + self.compressible = False + self.content_length = None + + def gzip_start_response(self, status, headers, exc_info=None): + self.headers = headers + ct = header_value(headers,'content-type') + ce = header_value(headers,'content-encoding') + self.compressible = False + if ct and (ct.startswith('text/') or ct.startswith('application/')) \ + and 'zip' not in ct: + self.compressible = True + if ce: + self.compressible = False + if self.compressible: + headers.append(('content-encoding', 'gzip')) + remove_header(headers, 'content-length') + self.headers = headers + self.status = status + return self.buffer.write + + def write(self): + out = self.buffer + out.seek(0) + s = out.getvalue() + out.close() + return [s] + + def finish_response(self, app_iter): + if self.compressible: + output = gzip.GzipFile(mode='wb', compresslevel=self.compress_level, + fileobj=self.buffer) + else: + output = self.buffer + try: + for s in app_iter: + output.write(s) + if self.compressible: + output.close() + finally: + if hasattr(app_iter, 'close'): + app_iter.close() + content_length = self.buffer.tell() + CONTENT_LENGTH.update(self.headers, content_length) + self.start_response(self.status, self.headers) + +def filter_factory(application, **conf): + import warnings + warnings.warn( + 'This function is deprecated; use make_gzip_middleware instead', + DeprecationWarning, 2) + def filter(application): + return middleware(application) + return filter + +def make_gzip_middleware(app, global_conf, compress_level=6): + """ + Wrap the middleware, so that it applies gzipping to a response + when it is supported by the browser and the content is of + type ``text/*`` or ``application/*`` + """ + compress_level = int(compress_level) + return middleware(app, compress_level=compress_level) |