summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Watson <cjwatson@debian.org>2020-08-17 18:38:33 +0100
committerColin Watson <cjwatson@debian.org>2020-08-17 18:38:33 +0100
commita2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (patch)
treef416b7c2af02dd04f9b3c70fc1b06c063082d65f
parentd093a0919198eb53826ae5753e517af10ad95d5b (diff)
downloadopenid-a2cb8bc70a12ad89a62a010fbe1569d21eed21d5.tar.gz
Fix normalization of non-ASCII query strings on Python 2
urinorm currently deals with encoding issues when normalizing the path, but not the query string. However, in some cases it can happen that the query string contains non-ASCII characters, particularly if using https://openid.net/specs/openid-simple-registration-extension-1_0.html in which case the user's full name may very well not be entirely ASCII; on Python 2 this resulted in a UnicodeEncodeError in urlencode. Work around this.
-rw-r--r--openid/test/test_urinorm.py8
-rw-r--r--openid/urinorm.py10
2 files changed, 16 insertions, 2 deletions
diff --git a/openid/test/test_urinorm.py b/openid/test/test_urinorm.py
index 53debfe..e85969b 100644
--- a/openid/test/test_urinorm.py
+++ b/openid/test/test_urinorm.py
@@ -82,6 +82,14 @@ class UrinormTest(unittest.TestCase):
def test_path_percent_decode_sub_delims(self):
self.assertEqual(urinorm('http://example.com/foo%2B%21bar'), 'http://example.com/foo+!bar')
+ def test_query_encoding(self):
+ self.assertEqual(
+ urinorm('http://example.com/?openid.sreg.fullname=Unícöde+Person'),
+ 'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+ self.assertEqual(
+ urinorm('http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person'),
+ 'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+
def test_illegal_characters(self):
six.assertRaisesRegex(self, ValueError, 'Illegal characters in URI', urinorm, 'http://<illegal>.com/')
diff --git a/openid/urinorm.py b/openid/urinorm.py
index 9678741..22b3dad 100644
--- a/openid/urinorm.py
+++ b/openid/urinorm.py
@@ -132,8 +132,14 @@ def urinorm(uri):
path = '/'
_check_disallowed_characters(path, 'path')
- # Normalize query
- data = parse_qsl(split_uri.query)
+ # Normalize query. On Python 2, `urlencode` without `doseq=True`
+ # requires values to be convertible to native strings using `str()`.
+ if isinstance(split_uri.query, str):
+ # Python 3 branch
+ data = parse_qsl(split_uri.query)
+ else:
+ # Python 2 branch
+ data = parse_qsl(split_uri.query.encode('utf-8'))
query = urlencode(data)
_check_disallowed_characters(query, 'query')