Fix normalization of non-ASCII query strings on Python 2

urinorm currently deals with encoding issues when normalizing the path, but not the query string. However, in some cases it can happen that the query string contains non-ASCII characters, particularly if using https://openid.net/specs/openid-simple-registration-extension-1_0.html in which case the user's full name may very well not be entirely ASCII; on Python 2 this resulted in a UnicodeEncodeError in urlencode. Work around this.
author: Colin Watson <cjwatson@debian.org> 2020-08-17 18:38:33 +0100
committer: Colin Watson <cjwatson@debian.org> 2020-08-17 18:38:33 +0100
commit: a2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (patch)
tree: f416b7c2af02dd04f9b3c70fc1b06c063082d65f
parent: d093a0919198eb53826ae5753e517af10ad95d5b (diff)
download: openid-a2cb8bc70a12ad89a62a010fbe1569d21eed21d5.tar.gz
2 files changed, 16 insertions, 2 deletions
diff --git a/openid/test/test_urinorm.py b/openid/test/test_urinorm.py
index 53debfe..e85969b 100644
--- a/openid/test/test_urinorm.py
+++ b/openid/test/test_urinorm.py
@@ -82,6 +82,14 @@ class UrinormTest(unittest.TestCase):
     def test_path_percent_decode_sub_delims(self):
         self.assertEqual(urinorm('http://example.com/foo%2B%21bar'), 'http://example.com/foo+!bar')
 
+    def test_query_encoding(self):
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Unícöde+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+
     def test_illegal_characters(self):
         six.assertRaisesRegex(self, ValueError, 'Illegal characters in URI', urinorm, 'http://<illegal>.com/')
 
diff --git a/openid/urinorm.py b/openid/urinorm.py
index 9678741..22b3dad 100644
--- a/openid/urinorm.py
+++ b/openid/urinorm.py
@@ -132,8 +132,14 @@ def urinorm(uri):
         path = '/'
     _check_disallowed_characters(path, 'path')
 
-    # Normalize query
-    data = parse_qsl(split_uri.query)
+    # Normalize query.  On Python 2, `urlencode` without `doseq=True`
+    # requires values to be convertible to native strings using `str()`.
+    if isinstance(split_uri.query, str):
+        # Python 3 branch
+        data = parse_qsl(split_uri.query)
+    else:
+        # Python 2 branch
+        data = parse_qsl(split_uri.query.encode('utf-8'))
     query = urlencode(data)
     _check_disallowed_characters(query, 'query')
author	Colin Watson <cjwatson@debian.org>	2020-08-17 18:38:33 +0100
committer	Colin Watson <cjwatson@debian.org>	2020-08-17 18:38:33 +0100
commit	a2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (patch)
tree	f416b7c2af02dd04f9b3c70fc1b06c063082d65f
parent	d093a0919198eb53826ae5753e517af10ad95d5b (diff)
download	openid-a2cb8bc70a12ad89a62a010fbe1569d21eed21d5.tar.gz