diff options
author | Colin Watson <cjwatson@debian.org> | 2020-08-17 18:38:33 +0100 |
---|---|---|
committer | Colin Watson <cjwatson@debian.org> | 2020-08-17 18:38:33 +0100 |
commit | a2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (patch) | |
tree | f416b7c2af02dd04f9b3c70fc1b06c063082d65f | |
parent | d093a0919198eb53826ae5753e517af10ad95d5b (diff) | |
download | openid-a2cb8bc70a12ad89a62a010fbe1569d21eed21d5.tar.gz |
Fix normalization of non-ASCII query strings on Python 2
urinorm currently deals with encoding issues when normalizing the path,
but not the query string. However, in some cases it can happen that the
query string contains non-ASCII characters, particularly if using
https://openid.net/specs/openid-simple-registration-extension-1_0.html
in which case the user's full name may very well not be entirely ASCII;
on Python 2 this resulted in a UnicodeEncodeError in urlencode. Work
around this.
-rw-r--r-- | openid/test/test_urinorm.py | 8 | ||||
-rw-r--r-- | openid/urinorm.py | 10 |
2 files changed, 16 insertions, 2 deletions
diff --git a/openid/test/test_urinorm.py b/openid/test/test_urinorm.py index 53debfe..e85969b 100644 --- a/openid/test/test_urinorm.py +++ b/openid/test/test_urinorm.py @@ -82,6 +82,14 @@ class UrinormTest(unittest.TestCase): def test_path_percent_decode_sub_delims(self): self.assertEqual(urinorm('http://example.com/foo%2B%21bar'), 'http://example.com/foo+!bar') + def test_query_encoding(self): + self.assertEqual( + urinorm('http://example.com/?openid.sreg.fullname=Unícöde+Person'), + 'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person') + self.assertEqual( + urinorm('http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person'), + 'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person') + def test_illegal_characters(self): six.assertRaisesRegex(self, ValueError, 'Illegal characters in URI', urinorm, 'http://<illegal>.com/') diff --git a/openid/urinorm.py b/openid/urinorm.py index 9678741..22b3dad 100644 --- a/openid/urinorm.py +++ b/openid/urinorm.py @@ -132,8 +132,14 @@ def urinorm(uri): path = '/' _check_disallowed_characters(path, 'path') - # Normalize query - data = parse_qsl(split_uri.query) + # Normalize query. On Python 2, `urlencode` without `doseq=True` + # requires values to be convertible to native strings using `str()`. + if isinstance(split_uri.query, str): + # Python 3 branch + data = parse_qsl(split_uri.query) + else: + # Python 2 branch + data = parse_qsl(split_uri.query.encode('utf-8')) query = urlencode(data) _check_disallowed_characters(query, 'query') |