From 89502c4f5a0db140d135c89d2332e6bb1203590f Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Thu, 30 Apr 2015 17:39:24 -0700 Subject: test_wsgirequest_charset: Use UTF-8 instead of iso-8859-1 because it seems that the defacto standard for encoding URIs is to use UTF-8. I've been reading about url encoding and it seems like perhaps using an encoding other than UTF-8 is very non-standard and not well-supported (this test is trying to use `iso-8859-1`). From http://en.wikipedia.org/wiki/Percent-encoding > For a non-ASCII character, it is typically converted to its byte sequence in > UTF-8, and then each byte value is represented as above. > The generic URI syntax mandates that new URI schemes that provide for the > representation of character data in a URI must, in effect, represent > characters from the unreserved set without translation, and should convert > all other characters to bytes according to UTF-8, and then percent-encode > those values. This requirement was introduced in January 2005 with the > publication of RFC 3986 From http://tools.ietf.org/html/rfc3986: > Non-ASCII characters must first be encoded according to UTF-8 [STD63], and > then each octet of the corresponding UTF-8 sequence must be percent-encoded > to be represented as URI characters. URI producing applications must not use > percent-encoding in host unless it is used to represent a UTF-8 character > sequence. From http://tools.ietf.org/html/rfc3987: > Conversions from URIs to IRIs MUST NOT use any character encoding other than > UTF-8 in steps 3 and 4, even if it might be possible to guess from the > context that another character encoding than UTF-8 was used in the URI. For > example, the URI "http://www.example.org/r%E9sum%E9.html" might with some > guessing be interpreted to contain two e-acute characters encoded as > iso-8859-1. It must not be converted to an IRI containing these e-acute > characters. Otherwise, in the future the IRI will be mapped to > "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different URI from > "http://www.example.org/r%E9sum%E9.html". See issue #7, which I think this at least partially fixes. --- tests/test_wsgiwrappers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_wsgiwrappers.py b/tests/test_wsgiwrappers.py index 833b4f2..8719693 100644 --- a/tests/test_wsgiwrappers.py +++ b/tests/test_wsgiwrappers.py @@ -36,9 +36,8 @@ def valid_name(name, encoding=no_encoding, post=False): def test_wsgirequest_charset(): # Jose, 'José' - app = TestApp(AssertApp(assertfunc=valid_name(u'José', - encoding='iso-8859-1'))) - res = app.get('/?name=Jos%E9') + app = TestApp(AssertApp(assertfunc=valid_name(u'José', encoding='UTF-8'))) + res = app.get('/?name=Jos%C3%A9') # Tanaka, '田中' app = TestApp(AssertApp(assertfunc=valid_name(u'田中', encoding='UTF-8'))) -- cgit v1.2.1