test_wsgirequest_charset: Use UTF-8 instead of iso-8859-1test_wsgirequest_charset_use_UTF-8_instead_of_iso-8859-1

because it seems that the defacto standard for encoding URIs is to use UTF-8. I've been reading about url encoding and it seems like perhaps using an encoding other than UTF-8 is very non-standard and not well-supported (this test is trying to use `iso-8859-1`). From http://en.wikipedia.org/wiki/Percent-encoding > For a non-ASCII character, it is typically converted to its byte sequence in > UTF-8, and then each byte value is represented as above. > The generic URI syntax mandates that new URI schemes that provide for the > representation of character data in a URI must, in effect, represent > characters from the unreserved set without translation, and should convert > all other characters to bytes according to UTF-8, and then percent-encode > those values. This requirement was introduced in January 2005 with the > publication of RFC 3986 From http://tools.ietf.org/html/rfc3986: > Non-ASCII characters must first be encoded according to UTF-8 [STD63], and > then each octet of the corresponding UTF-8 sequence must be percent-encoded > to be represented as URI characters. URI producing applications must not use > percent-encoding in host unless it is used to represent a UTF-8 character > sequence. From http://tools.ietf.org/html/rfc3987: > Conversions from URIs to IRIs MUST NOT use any character encoding other than > UTF-8 in steps 3 and 4, even if it might be possible to guess from the > context that another character encoding than UTF-8 was used in the URI. For > example, the URI "http://www.example.org/r%E9sum%E9.html" might with some > guessing be interpreted to contain two e-acute characters encoded as > iso-8859-1. It must not be converted to an IRI containing these e-acute > characters. Otherwise, in the future the IRI will be mapped to > "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different URI from > "http://www.example.org/r%E9sum%E9.html". See issue #7, which I think this at least partially fixes.
author: Marc Abramowitz <marc@marc-abramowitz.com> 2015-04-30 17:39:24 -0700
committer: Marc Abramowitz <marc@marc-abramowitz.com> 2015-04-30 17:39:24 -0700
commit: fa100c92c06d3a8a61a0dda1a2e06018437b09c6 (patch)
tree: a1cc50f93fbf257685c3849e03496c5e33949281 /tests/test_wsgiwrappers.py
download: paste-git-fa100c92c06d3a8a61a0dda1a2e06018437b09c6.tar.gz
1 files changed, 146 insertions, 0 deletions
diff --git a/tests/test_wsgiwrappers.py b/tests/test_wsgiwrappers.py
new file mode 100644
index 0000000..8719693
--- /dev/null
+++ b/tests/test_wsgiwrappers.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+# (c) 2007 Philip Jenvey; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+import cgi
+from paste.fixture import TestApp
+from paste.wsgiwrappers import WSGIRequest, WSGIResponse
+import six
+
+class AssertApp(object):
+    def __init__(self, assertfunc):
+        self.assertfunc = assertfunc
+
+    def __call__(self, environ, start_response):
+        start_response('200 OK', [('Content-type','text/plain')])
+        self.assertfunc(environ)
+        return ['Passed']
+
+no_encoding = object()
+def valid_name(name, encoding=no_encoding, post=False):
+    def assert_valid_name(environ):
+        if encoding is not no_encoding:
+            WSGIRequest.defaults._push_object(dict(content_type='text/html',
+                                                   charset=encoding))
+        try:
+            request = WSGIRequest(environ)
+            if post:
+                params = request.POST
+            else:
+                params = request.GET
+            assert params['name'] == name
+            assert request.params['name'] == name
+        finally:
+            if encoding is not no_encoding:
+                WSGIRequest.defaults._pop_object()
+    return assert_valid_name
+
+def test_wsgirequest_charset():
+    # Jose, 'José'
+    app = TestApp(AssertApp(assertfunc=valid_name(u'José', encoding='UTF-8')))
+    res = app.get('/?name=Jos%C3%A9')
+
+    # Tanaka, '田中'
+    app = TestApp(AssertApp(assertfunc=valid_name(u'田中', encoding='UTF-8')))
+    res = app.get('/?name=%E7%94%B0%E4%B8%AD')
+
+    # Nippon (Japan), '日本'
+    app = TestApp(AssertApp(assertfunc=valid_name(u'日本', encoding='UTF-8',
+                                                  post=True)))
+    res = app.post('/', params=dict(name='日本'))
+
+    # WSGIRequest will determine the charset from the Content-Type header when
+    # unicode is expected.
+    # No encoding specified: not expecting unicode
+    app = TestApp(AssertApp(assertfunc=valid_name('日本', post=True)))
+    content_type = 'application/x-www-form-urlencoded; charset=%s'
+    res = app.post('/', params=dict(name='日本'),
+                   headers={'content-type': content_type % 'UTF-8'})
+
+    # Encoding specified: expect unicode. Shiftjis is the default encoding, but
+    # params become UTF-8 because the browser specified so
+    app = TestApp(AssertApp(assertfunc=valid_name(u'日本', post=True,
+                                                  encoding='shiftjis')))
+    res = app.post('/', params=dict(name='日本'),
+                   headers={'content-type': content_type % 'UTF-8'})
+
+    # Browser did not specify: parse params as the fallback shiftjis
+    app = TestApp(AssertApp(assertfunc=valid_name(u'日本', post=True,
+                                                  encoding='shiftjis')))
+    res = app.post('/', params=dict(name=u'日本'.encode('shiftjis')))
+
+def test_wsgirequest_charset_fileupload():
+    def handle_fileupload(environ, start_response):
+        start_response('200 OK', [('Content-type','text/plain')])
+        request = WSGIRequest(environ)
+
+        assert len(request.POST) == 1
+        assert isinstance(request.POST.keys()[0], str)
+        fs = request.POST['thefile']
+        assert isinstance(fs, cgi.FieldStorage)
+        assert isinstance(fs.filename, str)
+        assert fs.filename == '寿司.txt'
+        assert fs.value == b'Sushi'
+
+        request.charset = 'UTF-8'
+        assert len(request.POST) == 1
+        assert isinstance(request.POST.keys()[0], str)
+        fs = request.POST['thefile']
+        assert isinstance(fs, cgi.FieldStorage)
+        assert isinstance(fs.filename, six.text_type)
+        assert fs.filename == u'寿司.txt'
+        assert fs.value == b'Sushi'
+
+        request.charset = None
+        assert fs.value == b'Sushi'
+        return []
+
+    app = TestApp(handle_fileupload)
+    res = app.post('/', upload_files=[('thefile', '寿司.txt', b'Sushi')])
+
+def test_wsgiresponse_charset():
+    response = WSGIResponse(mimetype='text/html; charset=UTF-8')
+    assert response.content_type == 'text/html'
+    assert response.charset == 'UTF-8'
+    response.write(u'test')
+    response.write(u'test2')
+    response.write('test3')
+    status, headers, content = response.wsgi_response()
+    for data in content:
+        assert isinstance(data, six.binary_type)
+
+    WSGIResponse.defaults._push_object(dict(content_type='text/html',
+                                            charset='iso-8859-1'))
+    try:
+        response = WSGIResponse()
+        response.write(u'test')
+        response.write(u'test2')
+        response.write('test3')
+        status, headers, content = response.wsgi_response()
+        for data in content:
+            assert isinstance(data, six.binary_type)
+    finally:
+        WSGIResponse.defaults._pop_object()
+
+    # WSGIResponse will allow unicode to pass through when no charset is
+    # set
+    WSGIResponse.defaults._push_object(dict(content_type='text/html',
+                                            charset=None))
+    try:
+        response = WSGIResponse(u'test')
+        response.write(u'test1')
+        status, headers, content = response.wsgi_response()
+        for data in content:
+            assert isinstance(data, six.text_type)
+    finally:
+        WSGIResponse.defaults._pop_object()
+
+    WSGIResponse.defaults._push_object(dict(content_type='text/html',
+                                            charset=''))
+    try:
+        response = WSGIResponse(u'test')
+        response.write(u'test1')
+        status, headers, content = response.wsgi_response()
+        for data in content:
+            assert isinstance(data, six.text_type)
+    finally:
+        WSGIResponse.defaults._pop_object()
author	Marc Abramowitz <marc@marc-abramowitz.com>	2015-04-30 17:39:24 -0700
committer	Marc Abramowitz <marc@marc-abramowitz.com>	2015-04-30 17:39:24 -0700
commit	fa100c92c06d3a8a61a0dda1a2e06018437b09c6 (patch)
tree	a1cc50f93fbf257685c3849e03496c5e33949281 /tests/test_wsgiwrappers.py
download	paste-git-fa100c92c06d3a8a61a0dda1a2e06018437b09c6.tar.gz