Merge pull request #47 from cjwatson/urinorm-query-unicodeHEAD master

Fix normalization of non-ASCII query strings on Python 2
author: Vlastimil Zíma <ziima@users.noreply.github.com> 2020-09-02 10:21:22 +0200
committer: GitHub <noreply@github.com> 2020-09-02 10:21:22 +0200
commit: afa6adacbe1a41d8f614c8bce2264dfbe9e76489 (patch)
tree: f416b7c2af02dd04f9b3c70fc1b06c063082d65f
parent: d093a0919198eb53826ae5753e517af10ad95d5b (diff)
parent: a2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (diff)
download: openid-master.tar.gz
2 files changed, 16 insertions, 2 deletions
diff --git a/openid/test/test_urinorm.py b/openid/test/test_urinorm.py
index 53debfe..e85969b 100644
--- a/openid/test/test_urinorm.py
+++ b/openid/test/test_urinorm.py
@@ -82,6 +82,14 @@ class UrinormTest(unittest.TestCase):
     def test_path_percent_decode_sub_delims(self):
         self.assertEqual(urinorm('http://example.com/foo%2B%21bar'), 'http://example.com/foo+!bar')
 
+    def test_query_encoding(self):
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Unícöde+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+
     def test_illegal_characters(self):
         six.assertRaisesRegex(self, ValueError, 'Illegal characters in URI', urinorm, 'http://<illegal>.com/')
 
diff --git a/openid/urinorm.py b/openid/urinorm.py
index 9678741..22b3dad 100644
--- a/openid/urinorm.py
+++ b/openid/urinorm.py
@@ -132,8 +132,14 @@ def urinorm(uri):
         path = '/'
     _check_disallowed_characters(path, 'path')
 
-    # Normalize query
-    data = parse_qsl(split_uri.query)
+    # Normalize query.  On Python 2, `urlencode` without `doseq=True`
+    # requires values to be convertible to native strings using `str()`.
+    if isinstance(split_uri.query, str):
+        # Python 3 branch
+        data = parse_qsl(split_uri.query)
+    else:
+        # Python 2 branch
+        data = parse_qsl(split_uri.query.encode('utf-8'))
     query = urlencode(data)
     _check_disallowed_characters(query, 'query')
author	Vlastimil Zíma <ziima@users.noreply.github.com>	2020-09-02 10:21:22 +0200
committer	GitHub <noreply@github.com>	2020-09-02 10:21:22 +0200
commit	afa6adacbe1a41d8f614c8bce2264dfbe9e76489 (patch)
tree	f416b7c2af02dd04f9b3c70fc1b06c063082d65f
parent	d093a0919198eb53826ae5753e517af10ad95d5b (diff)
parent	a2cb8bc70a12ad89a62a010fbe1569d21eed21d5 (diff)
download	openid-master.tar.gz