diff options
author | Robert Brewer <fumanchu@aminus.org> | 2006-06-27 17:11:19 +0000 |
---|---|---|
committer | Robert Brewer <fumanchu@aminus.org> | 2006-06-27 17:11:19 +0000 |
commit | a05e92bff09c989a0c52f4554d90d7541f32e186 (patch) | |
tree | 14a5ca3d10bd26da74ca3d52b2acd5a717394133 /cherrypy/lib/encoding.py | |
parent | 2ef575bc240b40f7cc37d1b56716ee70bcb9c8dc (diff) | |
download | cherrypy-git-a05e92bff09c989a0c52f4554d90d7541f32e186.tar.gz |
Renamed lib.encodings to lib.encoding to avoid shadowing the builtin.
Diffstat (limited to 'cherrypy/lib/encoding.py')
-rw-r--r-- | cherrypy/lib/encoding.py | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/cherrypy/lib/encoding.py b/cherrypy/lib/encoding.py new file mode 100644 index 00000000..5496936d --- /dev/null +++ b/cherrypy/lib/encoding.py @@ -0,0 +1,220 @@ +import struct +import time + +import cherrypy + + +def decode(encoding=None, default_encoding='utf-8'): + """Decode cherrypy.request.params .""" + if not encoding: + ct = cherrypy.request.headers.elements("Content-Type") + if ct: + ct = ct[0] + encoding = ct.params.get("charset", None) + if (not encoding) and ct.value.lower().startswith("text/"): + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + # When no explicit charset parameter is provided by the + # sender, media subtypes of the "text" type are defined + # to have a default charset value of "ISO-8859-1" when + # received via HTTP. + encoding = "ISO-8859-1" + + if not encoding: + encoding = default_encoding + + try: + decode_params(encoding) + except UnicodeDecodeError: + # IE and Firefox don't supply a charset when submitting form + # params with a CT of application/x-www-form-urlencoded. + # So after all our guessing, it could *still* be wrong. + # Start over with ISO-8859-1, since that seems to be preferred. + decode_params("ISO-8859-1") + +def decode_params(encoding): + decodedParams = {} + for key, value in cherrypy.request.params.items(): + if hasattr(value, 'file'): + # This is a file being uploaded: skip it + decodedParams[key] = value + elif isinstance(value, list): + # value is a list: decode each element + decodedParams[key] = [v.decode(encoding) for v in value] + else: + # value is a regular string: decode it + decodedParams[key] = value.decode(encoding) + + # Decode all or nothing, so we can try again on error. + cherrypy.request.params = decodedParams + + +# Encoding + +def encode(encoding=None, errors='strict'): + ct = cherrypy.response.headers.elements("Content-Type") + if ct: + ct = ct[0] + if ct.value.lower().startswith("text/"): + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) + cherrypy.response.headers["Content-Type"] = str(ct) + +def encode_stream(encoding, errors='strict'): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + def encoder(body): + for line in body: + yield line.encode(encoding, errors) + cherrypy.response.body = encoder(cherrypy.response.body) + return True + +def encode_string(encoding, errors='strict'): + """Encode a buffered response body.""" + try: + body = [] + for chunk in cherrypy.response.body: + body.append(chunk.encode(encoding, errors)) + cherrypy.response.body = body + except UnicodeError: + return False + else: + return True + +def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'): + response = cherrypy.response + + attempted_charsets = [] + + stream = cherrypy.config.get("stream_response", False) + if stream: + encoder = encode_stream + else: + response.collapse_body() + encoder = encode_string + + failmsg = "The response could not be encoded with %s" + + if encoding is not None: + # If specified, force this encoding to be used, or fail. + if encoder(encoding, errors): + return encoding + else: + raise cherrypy.HTTPError(500, failmsg % encoding) + + # Parse the Accept_Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = cherrypy.request.headers.elements('Accept-Charset') + if not encs: + # Any character-set is acceptable. + charsets = [] + if encoder(default_encoding, errors): + return default_encoding + else: + raise cherrypy.HTTPError(500, failmsg % default_encoding) + else: + charsets = [enc.value.lower() for enc in encs] + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + attempted_charsets.append(iso) + if encoder(iso, errors): + return iso + + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if default_encoding not in attempted_charsets: + attempted_charsets.append(default_encoding) + if encoder(default_encoding, errors): + return default_encoding + else: + encoding = element.value + if encoding not in attempted_charsets: + attempted_charsets.append(encoding) + if encoder(encoding, errors): + return encoding + + # No suitable encoding found. + ac = cherrypy.request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) + raise cherrypy.HTTPError(406, msg) + + +# GZIP + +def compress(body, compress_level): + """Compress 'body' at the given compress_level.""" + import zlib + + yield '\037\213' # magic header + yield '\010' # compression method + yield '\0' + yield struct.pack("<L", long(time.time())) + yield '\002' + yield '\377' + + crc = zlib.crc32("") + size = 0 + zobj = zlib.compressobj(compress_level, + zlib.DEFLATED, -zlib.MAX_WBITS, + zlib.DEF_MEM_LEVEL, 0) + for line in body: + size += len(line) + crc = zlib.crc32(line, crc) + yield zobj.compress(line) + yield zobj.flush() + yield struct.pack("<l", crc) + yield struct.pack("<L", size & 0xFFFFFFFFL) + +def gzip(compress_level=9, mime_types=['text/html', 'text/plain']): + response = cherrypy.response + if not response.body: + # Response body is empty (might be a 304 for instance) + return + + def zipit(): + # Return a generator that compresses the page + varies = response.headers.get("Vary", "") + varies = [x.strip() for x in varies.split(",") if x.strip()] + if "Accept-Encoding" not in varies: + varies.append("Accept-Encoding") + response.headers['Vary'] = ", ".join(varies) + + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + + acceptable = cherrypy.request.headers.elements('Accept-Encoding') + if not acceptable: + # If no Accept-Encoding field is present in a request, + # the server MAY assume that the client will accept any + # content coding. In this case, if "identity" is one of + # the available content-codings, then the server SHOULD use + # the "identity" content-coding, unless it has additional + # information that a different content-coding is meaningful + # to the client. + return + + ct = response.headers.get('Content-Type').split(';')[0] + for coding in acceptable: + if coding.value == 'identity' and coding.qvalue != 0: + return + if coding.value in ('gzip', 'x-gzip'): + if coding.qvalue == 0: + return + if ct in mime_types: + zipit() + return + cherrypy.HTTPError(406, "identity, gzip").set_response() + |