From 9817ec3e47bca8fba9a7cac56d785e9d644f7473 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Mon, 30 Jun 2014 23:17:28 +0000 Subject: Imported from /home/lorry/working-area/delta_python-packages_python-rfc3986/rfc3986-0.2.0.tar.gz. --- AUTHORS.rst | 9 + HISTORY.rst | 12 + LICENSE | 13 ++ MANIFEST.in | 8 + PKG-INFO | 181 +++++++++++++++ README.rst | 147 ++++++++++++ rfc3986.egg-info/PKG-INFO | 181 +++++++++++++++ rfc3986.egg-info/SOURCES.txt | 23 ++ rfc3986.egg-info/dependency_links.txt | 1 + rfc3986.egg-info/not-zip-safe | 1 + rfc3986.egg-info/top_level.txt | 1 + rfc3986/__init__.py | 36 +++ rfc3986/api.py | 78 +++++++ rfc3986/compat.py | 31 +++ rfc3986/exceptions.py | 9 + rfc3986/misc.py | 197 ++++++++++++++++ rfc3986/normalizers.py | 112 ++++++++++ rfc3986/uri.py | 256 +++++++++++++++++++++ setup.cfg | 5 + setup.py | 53 +++++ tests/conftest.py | 4 + tests/test_api.py | 16 ++ tests/test_normalizers.py | 72 ++++++ tests/test_unicode_support.py | 26 +++ tests/test_uri.py | 408 ++++++++++++++++++++++++++++++++++ 25 files changed, 1880 insertions(+) create mode 100644 AUTHORS.rst create mode 100644 HISTORY.rst create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 PKG-INFO create mode 100644 README.rst create mode 100644 rfc3986.egg-info/PKG-INFO create mode 100644 rfc3986.egg-info/SOURCES.txt create mode 100644 rfc3986.egg-info/dependency_links.txt create mode 100644 rfc3986.egg-info/not-zip-safe create mode 100644 rfc3986.egg-info/top_level.txt create mode 100644 rfc3986/__init__.py create mode 100644 rfc3986/api.py create mode 100644 rfc3986/compat.py create mode 100644 rfc3986/exceptions.py create mode 100644 rfc3986/misc.py create mode 100644 rfc3986/normalizers.py create mode 100644 rfc3986/uri.py create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 tests/conftest.py create mode 100644 tests/test_api.py create mode 100644 tests/test_normalizers.py create mode 100644 tests/test_unicode_support.py create mode 100644 tests/test_uri.py diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..f1a70ed --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,9 @@ +Development Lead +---------------- + +- Ian Cordasco + +Contributors +------------ + +None yet! diff --git a/HISTORY.rst b/HISTORY.rst new file mode 100644 index 0000000..fc764f4 --- /dev/null +++ b/HISTORY.rst @@ -0,0 +1,12 @@ +0.2.0 -- 2014-06-30 +------------------- + +- Add support for requiring components during validation. This includes adding + parameters ``require_scheme``, ``require_authority``, ``require_path``, + ``require_path``, ``require_query``, and ``require_fragment`` to + ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. + +0.1.0 -- 2014-06-27 +------------------- + +- Initial Release includes validation and normalization of URIs diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..72ce24c --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2014 Ian Cordasco, Rackspace + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9d51b59 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include README.rst +include LICENSE +include HISTORY.rst +include AUTHORS.rst +prune *.pyc +#recursive-include docs *.rst *.py Makefile +recursive-include tests *.py +prune docs/_build diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..0f37aa6 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,181 @@ +Metadata-Version: 1.1 +Name: rfc3986 +Version: 0.2.0 +Summary: Validating URI References per RFC 3986 +Home-page: https://rfc3986.rtfd.org +Author: Ian Cordasco +Author-email: ian.cordasco@rackspace.com +License: Apache 2.0 +Description: rfc3986 + ======= + + A Python implementation of `RFC 3986`_ including validation and authority + parsing. Coming soon: `Reference Resolution `_. + + Installation + ------------ + + Simply use pip to install ``rfc3986`` like so:: + + pip install rfc3986 + + License + ------- + + `Apache License Version 2.0`_ + + Example Usage + ------------- + + To parse a URI into a convenient named tuple, you can simply:: + + from rfc3986 import uri_reference + + example = uri_reference('http://example.com') + email = uri_reference('mailto:user@domain.com') + ssh = uri_reference('ssh://user@git.openstack.org:29418/openstack/keystone.git') + + With a parsed URI you can access data about the components:: + + print(example.scheme) # => http + print(email.path) # => user@domain.com + print(ssh.userinfo) # => user + print(ssh.host) # => git.openstack.org + print(ssh.port) # => 29418 + + It can also parse URIs with unicode present:: + + uni = uri_reference(b'http://httpbin.org/get?utf8=\xe2\x98\x83') # ☃ + print(uni.query) # utf8=%E2%98%83 + + With a parsed URI you can also validate it:: + + if ssh.is_valid(): + subprocess.call(['git', 'clone', ssh.unsplit()]) + + You can also take a parsed URI and normalize it:: + + mangled = uri_reference('hTTp://exAMPLe.COM') + print(mangled.scheme) # => hTTp + print(mangled.authority) # => exAMPLe.COM + + normal = mangled.normalize() + print(normal.scheme) # => http + print(mangled.authority) # => example.com + + But these two URIs are (functionally) equivalent:: + + if normal == mangled: + webbrowser.open(normal.unsplit()) + + Your paths, queries, and fragments are safe with us though:: + + mangled = uri_reference('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + normal = mangled.normalize() + assert normal == 'hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth' + assert normal == 'http://example.com/Some/reallY/biZZare/pAth' + assert normal != 'http://example.com/some/really/bizzare/path' + + If you do not actually need a real reference object and just want to normalize + your URI:: + + from rfc3986 import normalize_uri + + assert (normalize_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') == + 'http://example.com/Some/reallY/biZZare/pAth') + + You can also very simply validate a URI:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + + Requiring Components + ~~~~~~~~~~~~~~~~~~~~ + + You can validate that a particular string is a valid URI and require + independent components:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('http://localhost:8774/v2/resource', + require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + assert is_valid_uri('mailto:user@example.com', require_authority=True) is False + + If you have an instance of a ``URIReference``, you can pass the same arguments + to ``URIReference#is_valid``, e.g., + + .. code:: + + from rfc3986 import uri_reference + + http = uri_reference('http://localhost:8774/v2/resource') + assert uri.is_valid(require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + mailto = uri_reference('mailto:user@example.com') + assert uri.is_valid(require_authority=True) is False + + Alternatives + ------------ + + - `rfc3987 `_ + + This is a direct competitor to this library, with extra features, + licensed under the GPL. + + - `uritools `_ + + This can parse URIs in the manner of RFC 3986 but provides no validation and + only recently added Python 3 support. + + - Standard library's `urlparse`/`urllib.parse` + + The functions in these libraries can only split a URI (valid or not) and + provide no validation. + + Contributing + ------------ + + This project follows and enforces the Python Software Foundation's `Code of + Conduct `_. + + If you would like to contribute but do not have a bug or feature in mind, feel + free to email Ian and find out how you can help. + + .. _RFC 3986: http://tools.ietf.org/html/rfc3986 + .. _Apache License Version 2.0: https://www.apache.org/licenses/LICENSE-2.0 + + + 0.2.0 -- 2014-06-30 + ------------------- + + - Add support for requiring components during validation. This includes adding + parameters ``require_scheme``, ``require_authority``, ``require_path``, + ``require_path``, ``require_query``, and ``require_fragment`` to + ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. + + 0.1.0 -- 2014-06-27 + ------------------- + + - Initial Release includes validation and normalization of URIs + +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..25e55bd --- /dev/null +++ b/README.rst @@ -0,0 +1,147 @@ +rfc3986 +======= + +A Python implementation of `RFC 3986`_ including validation and authority +parsing. Coming soon: `Reference Resolution `_. + +Installation +------------ + +Simply use pip to install ``rfc3986`` like so:: + + pip install rfc3986 + +License +------- + +`Apache License Version 2.0`_ + +Example Usage +------------- + +To parse a URI into a convenient named tuple, you can simply:: + + from rfc3986 import uri_reference + + example = uri_reference('http://example.com') + email = uri_reference('mailto:user@domain.com') + ssh = uri_reference('ssh://user@git.openstack.org:29418/openstack/keystone.git') + +With a parsed URI you can access data about the components:: + + print(example.scheme) # => http + print(email.path) # => user@domain.com + print(ssh.userinfo) # => user + print(ssh.host) # => git.openstack.org + print(ssh.port) # => 29418 + +It can also parse URIs with unicode present:: + + uni = uri_reference(b'http://httpbin.org/get?utf8=\xe2\x98\x83') # ☃ + print(uni.query) # utf8=%E2%98%83 + +With a parsed URI you can also validate it:: + + if ssh.is_valid(): + subprocess.call(['git', 'clone', ssh.unsplit()]) + +You can also take a parsed URI and normalize it:: + + mangled = uri_reference('hTTp://exAMPLe.COM') + print(mangled.scheme) # => hTTp + print(mangled.authority) # => exAMPLe.COM + + normal = mangled.normalize() + print(normal.scheme) # => http + print(mangled.authority) # => example.com + +But these two URIs are (functionally) equivalent:: + + if normal == mangled: + webbrowser.open(normal.unsplit()) + +Your paths, queries, and fragments are safe with us though:: + + mangled = uri_reference('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + normal = mangled.normalize() + assert normal == 'hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth' + assert normal == 'http://example.com/Some/reallY/biZZare/pAth' + assert normal != 'http://example.com/some/really/bizzare/path' + +If you do not actually need a real reference object and just want to normalize +your URI:: + + from rfc3986 import normalize_uri + + assert (normalize_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') == + 'http://example.com/Some/reallY/biZZare/pAth') + +You can also very simply validate a URI:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + +Requiring Components +~~~~~~~~~~~~~~~~~~~~ + +You can validate that a particular string is a valid URI and require +independent components:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('http://localhost:8774/v2/resource', + require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + assert is_valid_uri('mailto:user@example.com', require_authority=True) is False + +If you have an instance of a ``URIReference``, you can pass the same arguments +to ``URIReference#is_valid``, e.g., + +.. code:: + + from rfc3986 import uri_reference + + http = uri_reference('http://localhost:8774/v2/resource') + assert uri.is_valid(require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + mailto = uri_reference('mailto:user@example.com') + assert uri.is_valid(require_authority=True) is False + +Alternatives +------------ + +- `rfc3987 `_ + + This is a direct competitor to this library, with extra features, + licensed under the GPL. + +- `uritools `_ + + This can parse URIs in the manner of RFC 3986 but provides no validation and + only recently added Python 3 support. + +- Standard library's `urlparse`/`urllib.parse` + + The functions in these libraries can only split a URI (valid or not) and + provide no validation. + +Contributing +------------ + +This project follows and enforces the Python Software Foundation's `Code of +Conduct `_. + +If you would like to contribute but do not have a bug or feature in mind, feel +free to email Ian and find out how you can help. + +.. _RFC 3986: http://tools.ietf.org/html/rfc3986 +.. _Apache License Version 2.0: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/rfc3986.egg-info/PKG-INFO b/rfc3986.egg-info/PKG-INFO new file mode 100644 index 0000000..0f37aa6 --- /dev/null +++ b/rfc3986.egg-info/PKG-INFO @@ -0,0 +1,181 @@ +Metadata-Version: 1.1 +Name: rfc3986 +Version: 0.2.0 +Summary: Validating URI References per RFC 3986 +Home-page: https://rfc3986.rtfd.org +Author: Ian Cordasco +Author-email: ian.cordasco@rackspace.com +License: Apache 2.0 +Description: rfc3986 + ======= + + A Python implementation of `RFC 3986`_ including validation and authority + parsing. Coming soon: `Reference Resolution `_. + + Installation + ------------ + + Simply use pip to install ``rfc3986`` like so:: + + pip install rfc3986 + + License + ------- + + `Apache License Version 2.0`_ + + Example Usage + ------------- + + To parse a URI into a convenient named tuple, you can simply:: + + from rfc3986 import uri_reference + + example = uri_reference('http://example.com') + email = uri_reference('mailto:user@domain.com') + ssh = uri_reference('ssh://user@git.openstack.org:29418/openstack/keystone.git') + + With a parsed URI you can access data about the components:: + + print(example.scheme) # => http + print(email.path) # => user@domain.com + print(ssh.userinfo) # => user + print(ssh.host) # => git.openstack.org + print(ssh.port) # => 29418 + + It can also parse URIs with unicode present:: + + uni = uri_reference(b'http://httpbin.org/get?utf8=\xe2\x98\x83') # ☃ + print(uni.query) # utf8=%E2%98%83 + + With a parsed URI you can also validate it:: + + if ssh.is_valid(): + subprocess.call(['git', 'clone', ssh.unsplit()]) + + You can also take a parsed URI and normalize it:: + + mangled = uri_reference('hTTp://exAMPLe.COM') + print(mangled.scheme) # => hTTp + print(mangled.authority) # => exAMPLe.COM + + normal = mangled.normalize() + print(normal.scheme) # => http + print(mangled.authority) # => example.com + + But these two URIs are (functionally) equivalent:: + + if normal == mangled: + webbrowser.open(normal.unsplit()) + + Your paths, queries, and fragments are safe with us though:: + + mangled = uri_reference('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + normal = mangled.normalize() + assert normal == 'hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth' + assert normal == 'http://example.com/Some/reallY/biZZare/pAth' + assert normal != 'http://example.com/some/really/bizzare/path' + + If you do not actually need a real reference object and just want to normalize + your URI:: + + from rfc3986 import normalize_uri + + assert (normalize_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') == + 'http://example.com/Some/reallY/biZZare/pAth') + + You can also very simply validate a URI:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('hTTp://exAMPLe.COM/Some/reallY/biZZare/pAth') + + Requiring Components + ~~~~~~~~~~~~~~~~~~~~ + + You can validate that a particular string is a valid URI and require + independent components:: + + from rfc3986 import is_valid_uri + + assert is_valid_uri('http://localhost:8774/v2/resource', + require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + assert is_valid_uri('mailto:user@example.com', require_authority=True) is False + + If you have an instance of a ``URIReference``, you can pass the same arguments + to ``URIReference#is_valid``, e.g., + + .. code:: + + from rfc3986 import uri_reference + + http = uri_reference('http://localhost:8774/v2/resource') + assert uri.is_valid(require_scheme=True, + require_authority=True, + require_path=True) + + # Assert that a mailto URI is invalid if you require an authority + # component + mailto = uri_reference('mailto:user@example.com') + assert uri.is_valid(require_authority=True) is False + + Alternatives + ------------ + + - `rfc3987 `_ + + This is a direct competitor to this library, with extra features, + licensed under the GPL. + + - `uritools `_ + + This can parse URIs in the manner of RFC 3986 but provides no validation and + only recently added Python 3 support. + + - Standard library's `urlparse`/`urllib.parse` + + The functions in these libraries can only split a URI (valid or not) and + provide no validation. + + Contributing + ------------ + + This project follows and enforces the Python Software Foundation's `Code of + Conduct `_. + + If you would like to contribute but do not have a bug or feature in mind, feel + free to email Ian and find out how you can help. + + .. _RFC 3986: http://tools.ietf.org/html/rfc3986 + .. _Apache License Version 2.0: https://www.apache.org/licenses/LICENSE-2.0 + + + 0.2.0 -- 2014-06-30 + ------------------- + + - Add support for requiring components during validation. This includes adding + parameters ``require_scheme``, ``require_authority``, ``require_path``, + ``require_path``, ``require_query``, and ``require_fragment`` to + ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. + + 0.1.0 -- 2014-06-27 + ------------------- + + - Initial Release includes validation and normalization of URIs + +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 diff --git a/rfc3986.egg-info/SOURCES.txt b/rfc3986.egg-info/SOURCES.txt new file mode 100644 index 0000000..b4da1a9 --- /dev/null +++ b/rfc3986.egg-info/SOURCES.txt @@ -0,0 +1,23 @@ +AUTHORS.rst +HISTORY.rst +LICENSE +MANIFEST.in +README.rst +setup.py +rfc3986/__init__.py +rfc3986/api.py +rfc3986/compat.py +rfc3986/exceptions.py +rfc3986/misc.py +rfc3986/normalizers.py +rfc3986/uri.py +rfc3986.egg-info/PKG-INFO +rfc3986.egg-info/SOURCES.txt +rfc3986.egg-info/dependency_links.txt +rfc3986.egg-info/not-zip-safe +rfc3986.egg-info/top_level.txt +tests/conftest.py +tests/test_api.py +tests/test_normalizers.py +tests/test_unicode_support.py +tests/test_uri.py \ No newline at end of file diff --git a/rfc3986.egg-info/dependency_links.txt b/rfc3986.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/rfc3986.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/rfc3986.egg-info/not-zip-safe b/rfc3986.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/rfc3986.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/rfc3986.egg-info/top_level.txt b/rfc3986.egg-info/top_level.txt new file mode 100644 index 0000000..af30258 --- /dev/null +++ b/rfc3986.egg-info/top_level.txt @@ -0,0 +1 @@ +rfc3986 diff --git a/rfc3986/__init__.py b/rfc3986/__init__.py new file mode 100644 index 0000000..57255d9 --- /dev/null +++ b/rfc3986/__init__.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +rfc3986 +======= + +An implementation of semantics and validations described in RFC 3986. See +http://rfc3986.rtfd.org/ for documentation. + +:copyright: (c) 2014 Rackspace +:license: Apache v2.0, see LICENSE for details +""" + +__title__ = 'rfc3986' +__author__ = 'Ian Cordasco' +__author_email__ = 'ian.cordasco@rackspace.com' +__license__ = 'Apache v2.0' +__copyright__ = 'Copyright 2014 Rackspace' +__version__ = '0.2.0' + +from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri) + +__all__ = ['URIReference', 'uri_reference', 'is_valid_uri', 'normalize_uri'] diff --git a/rfc3986/api.py b/rfc3986/api.py new file mode 100644 index 0000000..c993bb8 --- /dev/null +++ b/rfc3986/api.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +rfc3986.api +~~~~~~~~~~~ + +This defines the simple API to rfc3986. This module defines 3 functions and +provides access to the class ``URIReference``. +""" + +from .uri import URIReference + + +def uri_reference(uri, encoding='utf-8'): + """Parse a URI string into a URIReference. + + This is a convenience function. You could achieve the same end by using + ``URIReference.from_string(uri)``. + + :param str uri: The URI which needs to be parsed into a reference. + :param str encoding: The encoding of the string provided + :returns: A parsed URI + :rtype: :class:`URIReference` + """ + return URIReference.from_string(uri, encoding) + + +def is_valid_uri(uri, encoding='utf-8', **kwargs): + """Determine if the URI given is valid. + + This is a convenience function. You could use either + ``uri_reference(uri).is_valid()`` or + ``URIReference.from_string(uri).is_valid()`` to achieve the same result. + + :param str uri: The URI to be validated. + :param str encoding: The encoding of the string provided + :param bool require_scheme: Set to ``True`` if you wish to require the + presence of the scheme component. + :param bool require_authority: Set to ``True`` if you wish to require the + presence of the authority component. + :param bool require_path: Set to ``True`` if you wish to require the + presence of the path component. + :param bool require_query: Set to ``True`` if you wish to require the + presence of the query component. + :param bool require_fragment: Set to ``True`` if you wish to require the + presence of the fragment component. + :returns: ``True`` if the URI is valid, ``False`` otherwise. + :rtype: bool + """ + return URIReference.from_string(uri, encoding).is_valid(**kwargs) + + +def normalize_uri(uri, encoding='utf-8'): + """Normalize the given URI. + + This is a convenience function. You could use either + ``uri_reference(uri).normalize().unsplit()`` or + ``URIReference.from_string(uri).normalize().unsplit()`` instead. + + :param str uri: The URI to be normalized. + :param str encoding: The encoding of the string provided + :returns: The normalized URI. + :rtype: str + """ + normalized_reference = URIReference.from_string(uri, encoding).normalize() + return normalized_reference.unsplit() diff --git a/rfc3986/compat.py b/rfc3986/compat.py new file mode 100644 index 0000000..6fc7f6d --- /dev/null +++ b/rfc3986/compat.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys + + +if sys.version_info >= (3, 0): + unicode = str # Python 3.x + + +def to_str(b, encoding): + if hasattr(b, 'decode') and not isinstance(b, unicode): + b = b.decode('utf-8') + return b + + +def to_bytes(s, encoding): + if hasattr(s, 'encode') and not isinstance(s, bytes): + s = s.encode('utf-8') + return s diff --git a/rfc3986/exceptions.py b/rfc3986/exceptions.py new file mode 100644 index 0000000..19ad191 --- /dev/null +++ b/rfc3986/exceptions.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +class RFC3986Exception(Exception): + pass + + +class InvalidAuthority(RFC3986Exception): + def __init__(self, authority): + super(InvalidAuthority, self).__init__( + "The authority ({0}) is not valid.".format(authority)) diff --git a/rfc3986/misc.py b/rfc3986/misc.py new file mode 100644 index 0000000..d6205f6 --- /dev/null +++ b/rfc3986/misc.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +rfc3986.misc +~~~~~~~~~~~~ + +This module contains important constants, patterns, and compiled regular +expressions for parsing and validating URIs and their components. +""" + +import re + +# These are enumerated for the named tuple used as a superclass of +# URIReference +URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment'] + +important_characters = { + 'generic_delimiters': ":/?#[]@", + 'sub_delimiters': "!$&'()*+,;=", + # We need to escape the '*' in this case + 're_sub_delimiters': "!$&'()\*+,;=", + 'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' + '012345789._~-'), + # We need to escape the '-' in this case: + 're_unreserved': 'A-Za-z0-9._~\-', + } +# For details about delimiters and reserved characters, see: +# http://tools.ietf.org/html/rfc3986#section-2.2 +GENERIC_DELIMITERS = set(important_characters['generic_delimiters']) +SUB_DELIMITERS = set(important_characters['sub_delimiters']) +RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS) +# For details about unreserved characters, see: +# http://tools.ietf.org/html/rfc3986#section-2.3 +UNRESERVED_CHARS = set(important_characters['unreserved_chars']) +NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%') + +# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B +component_pattern_dict = { + 'scheme': '[^:/?#]+', + 'authority': '[^/?#]*', + 'path': '[^?#]*', + 'query': '[^#]*', + 'fragment': '.*', + } + +# See http://tools.ietf.org/html/rfc3986#appendix-B +# In this case, we name each of the important matches so we can use +# SRE_Match#groupdict to parse the values out if we so choose. This is also +# modified to ignore other matches that are not important to the parsing of +# the reference so we can also simply use SRE_Match#groups. +expression = ('(?:(?P{scheme}):)?(?://(?P{authority}))?' + '(?P{path})(?:\?(?P{query}))?' + '(?:#(?P{fragment}))?' + ).format(**component_pattern_dict) + +URI_MATCHER = re.compile(expression) + +# ######################### +# Authority Matcher Section +# ######################### + +# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 +# The pattern for a regular name, e.g., www.google.com, api.github.com +reg_name = '[\w\d.]+' +# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, +ipv4 = '(\d{1,3}.){3}\d{1,3}' +# Hexadecimal characters used in each piece of an IPv6 address +hexdig = '[0-9A-Fa-f]{1,4}' +# Least-significant 32 bits of an IPv6 address +ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4) +# Substitutions into the following patterns for IPv6 patterns defined +# http://tools.ietf.org/html/rfc3986#page-20 +subs = {'hex': hexdig, 'ls32': ls32} + +# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details +# about ABNF (Augmented Backus-Naur Form) use in the comments +variations = [ + # 6( h16 ":" ) ls32 + '(%(hex)s:){6}%(ls32)s' % subs, + # "::" 5( h16 ":" ) ls32 + '::(%(hex)s:){5}%(ls32)s' % subs, + # [ h16 ] "::" 4( h16 ":" ) ls32 + '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs, + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs, + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs, + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs, + # [ *4( h16 ":" ) h16 ] "::" ls32 + '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs, + # [ *5( h16 ":" ) h16 ] "::" h16 + '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs, + # [ *6( h16 ":" ) h16 ] "::" + '((%(hex)s:){0,6}%(hex)s)?::' % subs, + ] + +ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations) + +ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % ( + important_characters['re_unreserved'] + + important_characters['re_sub_delimiters'] + + ':') + +ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future) + +# Pattern for matching the host piece of the authority +HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal) + +SUBAUTHORITY_MATCHER = re.compile(( + '^(?:(?P[A-Za-z0-9_.~\-%:]+)@)?' # userinfo + '(?P{0}?)' # host + ':?(?P\d+)?$' # port + ).format(HOST_PATTERN)) + + +# #################### +# Path Matcher Section +# #################### + +# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information +# about the path patterns defined below. + +# Percent encoded character values +pct_encoded = '%[A-Fa-f0-9]{2}' +pchar = ('([' + important_characters['re_unreserved'] + + important_characters['re_sub_delimiters'] + + ':@]|%s)' % pct_encoded) +segments = { + 'segment': pchar + '*', + # Non-zero length segment + 'segment-nz': pchar + '+', + # Non-zero length segment without ":" + 'segment-nz-nc': pchar.replace(':', '') + '+' + } + +# Path types taken from Section 3.3 (linked above) +path_empty = '^$' +path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments +path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments +path_absolute = '/(%s)?' % path_rootless +path_abempty = '(/%(segment)s)*' % segments + +# Matcher used to validate path components +PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % ( + path_abempty, path_absolute, path_noscheme, path_rootless, path_empty + )) + + +# ################################## +# Query and Fragment Matcher Section +# ################################## + +QUERY_MATCHER = re.compile( + '^([/?:@' + important_characters['re_unreserved'] + + important_characters['re_sub_delimiters'] + + ']|%s)*$' % pct_encoded) + +FRAGMENT_MATCHER = QUERY_MATCHER + +# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 +SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$') + +# Relative reference matcher + +# See http://tools.ietf.org/html/rfc3986#section-4.2 for details +relative_part = '(//%s%s|%s|%s|%s)' % ( + component_pattern_dict['authority'], path_abempty, path_absolute, + path_noscheme, path_empty + ) + +RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % ( + relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern + )) + +# See http://tools.ietf.org/html/rfc3986#section-3 for definition +hier_part = '(//%s%s|%s|%s|%s)' % ( + component_pattern_dict['authority'], path_abempty, path_absolute, + path_rootless, path_empty + ) + +# See http://tools.ietf.org/html/rfc3986#section-4.3 +ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)$' % ( + component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern + )) diff --git a/rfc3986/normalizers.py b/rfc3986/normalizers.py new file mode 100644 index 0000000..d232093 --- /dev/null +++ b/rfc3986/normalizers.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +from .compat import to_bytes +from .misc import NON_PCT_ENCODED + + +def normalize_scheme(scheme): + return scheme.lower() + + +def normalize_authority(authority): + userinfo, host, port = authority + result = '' + if userinfo: + result += normalize_percent_characters(userinfo) + '@' + if host: + result += host.lower() + if port: + result += ':' + port + return result + + +def normalize_path(path): + path = normalize_percent_characters(path) + return remove_dot_segments(path) + + +def normalize_query(query): + return normalize_percent_characters(query) + + +def normalize_fragment(fragment): + return normalize_percent_characters(fragment) + + +PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}') + + +def normalize_percent_characters(s): + """All percent characters should be upper-cased. + + For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. + """ + matches = set(PERCENT_MATCHER.findall(s)) + for m in matches: + if not m.isupper(): + s = s.replace(m, m.upper()) + return s + + +def remove_dot_segments(s): + # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code + segments = s.split('/') # Turn the path into a list of segments + output = [] # Initialize the variable to use to store output + + for segment in segments: + # '.' is the current directory, so ignore it, it is superfluous + if segment == '.': + continue + # Anything other than '..', should be appended to the output + elif segment != '..': + output.append(segment) + # In this case segment == '..', if we can, we should pop the last + # element + elif output: + output.pop() + + # If the path starts with '/' and the output is empty or the first string + # is non-empty + if s.startswith('/') and (not output or output[0]): + output.insert(0, '') + + # If the path starts with '/.' or '/..' ensure we add one more empty + # string to add a trailing '/' + if s.endswith(('/.', '/..')): + output.append('') + + return '/'.join(output) + + +def encode_component(uri_component, encoding): + if uri_component is None: + return uri_component + + uri_bytes = to_bytes(uri_component, encoding) + + encoded_uri = bytearray() + + for i in range(0, len(uri_bytes)): + # Will return a single character bytestring on both Python 2 & 3 + byte = uri_bytes[i:i+1] + byte_ord = ord(byte) + if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED: + encoded_uri.extend(byte) + continue + encoded_uri.extend('%{0:02x}'.format(byte_ord).encode()) + + return encoded_uri.decode(encoding) diff --git a/rfc3986/uri.py b/rfc3986/uri.py new file mode 100644 index 0000000..8df4cda --- /dev/null +++ b/rfc3986/uri.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + +from .compat import to_str +from .exceptions import InvalidAuthority +from .misc import ( + FRAGMENT_MATCHER, PATH_MATCHER, QUERY_MATCHER, SCHEME_MATCHER, + SUBAUTHORITY_MATCHER, URI_MATCHER, URI_COMPONENTS + ) +from .normalizers import ( + encode_component, normalize_scheme, normalize_authority, normalize_path, + normalize_query, normalize_fragment + ) + + +class URIReference(namedtuple('URIReference', URI_COMPONENTS)): + slots = () + + def __new__(cls, scheme, authority, path, query, fragment): + return super(URIReference, cls).__new__( + cls, + scheme or None, + authority or None, + path or None, + query or None, + fragment or None) + + def __eq__(self, other): + other_ref = other + if isinstance(other, tuple): + other_ref = URIReference(*other) + elif not isinstance(other, URIReference): + try: + other_ref = URIReference.from_string(other) + except TypeError: + raise TypeError( + 'Unable to compare URIReference() to {0}()'.format( + type(other).__name__)) + + # See http://tools.ietf.org/html/rfc3986#section-6.2 + naive_equality = tuple(self) == tuple(other_ref) + return naive_equality or self.normalized_equality(other_ref) + + @classmethod + def from_string(cls, uri_string, encoding='utf-8'): + """Parse a URI reference from the given unicode URI string. + + :param str uri_string: Unicode URI to be parsed into a reference. + :param str encoding: The encoding of the string provided + :returns: :class:`URIReference` or subclass thereof + """ + uri_string = to_str(uri_string, encoding) + + split_uri = URI_MATCHER.match(uri_string).groupdict() + return URIReference(split_uri['scheme'], split_uri['authority'], + encode_component(split_uri['path'], encoding), + encode_component(split_uri['query'], encoding), + encode_component(split_uri['fragment'], encoding)) + + def authority_info(self): + """Returns a dictionary with the ``userinfo``, ``host``, and ``port``. + + If the authority is not valid, it will raise a ``InvalidAuthority`` + Exception. + + :returns: + ``{'userinfo': 'username:password', 'host': 'www.example.com', + 'port': '80'}`` + :rtype: dict + :raises InvalidAuthority: If the authority is not ``None`` and can not + be parsed. + """ + if not self.authority: + return {'userinfo': None, 'host': None, 'port': None} + + match = SUBAUTHORITY_MATCHER.match(self.authority) + + if match is None: + # In this case, we have an authority that was parsed from the URI + # Reference, but it cannot be further parsed by our + # SUBAUTHORITY_MATCHER. In this case it must not be a valid + # authority. + raise InvalidAuthority(self.authority) + + return match.groupdict() + + @property + def host(self): + """If present, a string representing the host.""" + try: + authority = self.authority_info() + except InvalidAuthority: + return None + return authority['host'] + + @property + def port(self): + """If present, the port (as a string) extracted from the authority.""" + try: + authority = self.authority_info() + except InvalidAuthority: + return None + return authority['port'] + + @property + def userinfo(self): + """If present, the userinfo extracted from the authority.""" + try: + authority = self.authority_info() + except InvalidAuthority: + return None + return authority['userinfo'] + + def is_valid(self, **kwargs): + """Determines if the URI is valid. + + :param bool require_scheme: Set to ``True`` if you wish to require the + presence of the scheme component. + :param bool require_authority: Set to ``True`` if you wish to require + the presence of the authority component. + :param bool require_path: Set to ``True`` if you wish to require the + presence of the path component. + :param bool require_query: Set to ``True`` if you wish to require the + presence of the query component. + :param bool require_fragment: Set to ``True`` if you wish to require + the presence of the fragment component. + :returns: ``True`` if the URI is valid. ``False`` otherwise. + :rtype: bool + """ + validators = [ + (self.scheme_is_valid, kwargs.get('require_scheme', False)), + (self.authority_is_valid, kwargs.get('require_authority', False)), + (self.path_is_valid, kwargs.get('require_path', False)), + (self.query_is_valid, kwargs.get('require_query', False)), + (self.fragment_is_valid, kwargs.get('require_fragment', False)), + ] + return all(v(r) for v, r in validators) + + def _is_valid(self, value, matcher, require): + if require: + return (value is not None + and matcher.match(value)) + + # require is False and value is not None + return value is None or matcher.match(value) + + def authority_is_valid(self, require=False): + """Determines if the authority component is valid. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the authority is valid. ``False`` otherwise. + :rtype: bool + """ + return self._is_valid(self.authority, SUBAUTHORITY_MATCHER, require) + + def scheme_is_valid(self, require=False): + """Determines if the scheme component is valid. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the scheme is valid. ``False`` otherwise. + :rtype: bool + """ + return self._is_valid(self.scheme, SCHEME_MATCHER, require) + + def path_is_valid(self, require=False): + """Determines if the path component is valid. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the path is valid. ``False`` otherwise. + :rtype: bool + """ + return self._is_valid(self.path, PATH_MATCHER, require) + + def query_is_valid(self, require=False): + """Determines if the query component is valid. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the query is valid. ``False`` otherwise. + :rtype: bool + """ + return self._is_valid(self.query, QUERY_MATCHER, require) + + def fragment_is_valid(self, require=False): + """Determines if the fragment component is valid. + + :param str require: Set to ``True`` to require the presence of this + component. + :returns: ``True`` if the fragment is valid. ``False`` otherwise. + :rtype: bool + """ + return self._is_valid(self.fragment, FRAGMENT_MATCHER, require) + + def normalize(self): + """Normalize this reference as described in Section 6.2.2 + + This is not an in-place normalization. Instead this creates a new + URIReference. + + :returns: A new reference object with normalized components. + :rtype: URIReference + """ + # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in + # this method. + return URIReference(normalize_scheme(self.scheme or ''), + normalize_authority( + (self.userinfo, self.host, self.port)), + normalize_path(self.path or ''), + normalize_query(self.query or ''), + normalize_fragment(self.fragment or '')) + + def normalized_equality(self, other_ref): + """Compare this URIReference to another URIReference. + + :param URIReference other_ref: (required), The reference with which + we're comparing. + :returns: ``True`` if the references are equal, ``False`` otherwise. + :rtype: bool + """ + return tuple(self.normalize()) == tuple(other_ref.normalize()) + + def unsplit(self): + """Create a URI string from the components. + + :returns: The URI Reference reconstituted as a string. + :rtype: str + """ + # See http://tools.ietf.org/html/rfc3986#section-5.3 + result_list = [] + if self.scheme: + result_list.extend([self.scheme, ':']) + if self.authority: + result_list.extend(['//', self.authority]) + if self.path: + result_list.append(self.path) + if self.query: + result_list.extend(['?', self.query]) + if self.fragment: + result_list.extend(['#', self.fragment]) + return ''.join(result_list) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..861a9f5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..28a4e26 --- /dev/null +++ b/setup.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import os +import sys + +import rfc3986 + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +if sys.argv[-1] == 'publish': + os.system('python setup.py bdist_wheel sdist upload') + sys.exit() + +packages = [ + 'rfc3986', +] + +with open('README.rst') as f: + readme = f.read() + +with open('HISTORY.rst') as f: + history = f.read() + +setup( + name='rfc3986', + version=rfc3986.__version__, + description='Validating URI References per RFC 3986', + long_description=readme + '\n\n' + history, + author='Ian Cordasco', + author_email='ian.cordasco@rackspace.com', + url='https://rfc3986.rtfd.org', + packages=packages, + package_data={'': ['LICENSE']}, + package_dir={'requests': 'requests'}, + include_package_data=True, + license='Apache 2.0', + zip_safe=False, + classifiers=( + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + ), +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ba44dad --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +import sys + +sys.path.insert(0, '.') diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..9e9189a --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +from rfc3986.api import ( + uri_reference, is_valid_uri, normalize_uri, URIReference + ) + + +def test_uri_reference(): + assert isinstance(uri_reference('http://example.com'), URIReference) + + +def test_is_valid_uri(): + assert is_valid_uri('http://example.com') is True + + +def test_normalize_uri(): + assert normalize_uri('HTTP://EXAMPLE.COM') == 'http://example.com' diff --git a/tests/test_normalizers.py b/tests/test_normalizers.py new file mode 100644 index 0000000..ddcb97b --- /dev/null +++ b/tests/test_normalizers.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import pytest + +from rfc3986.uri import URIReference +from rfc3986.normalizers import ( + normalize_scheme, normalize_percent_characters, remove_dot_segments + ) + + +def test_normalize_scheme(): + assert 'http' == normalize_scheme('htTp') + assert 'http' == normalize_scheme('http') + assert 'http' == normalize_scheme('HTTP') + + +def test_normalize_percent_characters(): + expected = '%3Athis_should_be_lowercase%DF%AB%4C' + assert expected == normalize_percent_characters( + '%3athis_should_be_lowercase%DF%ab%4c') + assert expected == normalize_percent_characters( + '%3Athis_should_be_lowercase%DF%AB%4C') + assert expected == normalize_percent_characters( + '%3Athis_should_be_lowercase%DF%aB%4C') + + +paths = [ + # (Input, expected output) + ('/foo/bar/.', '/foo/bar/'), + ('/foo/bar/', '/foo/bar/'), + ('/foo/bar', '/foo/bar'), + ('./foo/bar', 'foo/bar'), + ('/./foo/bar', '/foo/bar'), + ('/foo%20bar/biz%2Abaz', '/foo%20bar/biz%2Abaz'), + ('../foo/bar', 'foo/bar'), + ('/../foo/bar', '/foo/bar'), + ('a/./b/../b/%63/%7Bfoo%7D', 'a/b/%63/%7Bfoo%7D'), + ('//a/./b/../b/%63/%7Bfoo%7D', '//a/b/%63/%7Bfoo%7D'), + ('mid/content=5/../6', 'mid/6'), + ('/a/b/c/./../../g', '/a/g'), + ] + + +@pytest.fixture(params=paths) +def path_fixture(request): + return request.param + + +@pytest.fixture(params=paths) +def uris(request): + to_norm, normalized = request.param + return (URIReference(None, None, to_norm, None, None), + URIReference(None, None, normalized, None, None)) + + +def test_remove_dot_segments(path_fixture): + to_normalize, expected = path_fixture + assert expected == remove_dot_segments(to_normalize) + + +def test_normalized_equality(uris): + assert uris[0] == uris[1] + + +def test_hostname_normalization(): + assert (URIReference(None, 'EXAMPLE.COM', None, None, None) == + URIReference(None, 'example.com', None, None, None)) + + +def test_authority_normalization(): + uri = URIReference( + None, 'user%2aName@EXAMPLE.COM', None, None, None).normalize() + assert uri.authority == 'user%2AName@example.com' diff --git a/tests/test_unicode_support.py b/tests/test_unicode_support.py new file mode 100644 index 0000000..0d6ea86 --- /dev/null +++ b/tests/test_unicode_support.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +from rfc3986 import uri_reference + + +def test_unicode_uri(): + url_bytestring = b'http://example.com?utf8=\xe2\x98\x83' + unicode_url = url_bytestring.decode('utf-8') + uri = uri_reference(unicode_url) + assert uri.is_valid() is True + assert uri == 'http://example.com?utf8=%E2%98%83' + + +def test_unicode_uri_passed_as_bytes(): + url_bytestring = b'http://example.com?utf8=\xe2\x98\x83' + uri = uri_reference(url_bytestring) + assert uri.is_valid() is True + assert uri == 'http://example.com?utf8=%E2%98%83' + + +def test_unicode_authority(): + url_bytestring = b'http://\xe2\x98\x83.com' + unicode_url = url_bytestring.decode('utf-8') + uri = uri_reference(unicode_url) + assert uri.is_valid() is False + assert uri == unicode_url diff --git a/tests/test_uri.py b/tests/test_uri.py new file mode 100644 index 0000000..8817a54 --- /dev/null +++ b/tests/test_uri.py @@ -0,0 +1,408 @@ +# -*- coding: utf-8 -*- +import pytest + +from rfc3986.exceptions import InvalidAuthority +from rfc3986.misc import URI_MATCHER +from rfc3986.uri import URIReference + + +valid_hosts = [ + '[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', '[::1]', + '[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', '[FE80::2AA:FF:FE9A:4CA2]', + '[FF02::2]', '[FF02:3::5]', '[FF02:0:0:0:0:0:0:2]', + '[FF02:30:0:0:0:0:0:5]', '127.0.0.1', 'www.example.com', 'localhost' + ] + +invalid_hosts = [ + '[FF02::3::5]', # IPv6 can only have one :: + '[FADF:01]', # Not properly compacted (missing a :) + 'localhost:80:80:80' # Too many ports + ] + + +@pytest.fixture(params=valid_hosts) +def basic_uri(request): + return 'http://%s' % request.param + + +@pytest.fixture(params=valid_hosts) +def basic_uri_with_port(request): + return 'ftp://%s:21' % request.param + + +@pytest.fixture(params=valid_hosts) +def uri_with_port_and_userinfo(request): + return 'ssh://user:pass@%s:22' % request.param + + +@pytest.fixture(params=valid_hosts) +def basic_uri_with_path(request): + return 'http://%s/path/to/resource' % request.param + + +@pytest.fixture(params=valid_hosts) +def uri_with_path_and_query(request): + return 'http://%s/path/to/resource?key=value' % request.param + + +@pytest.fixture(params=valid_hosts) +def uri_with_everything(request): + return 'https://user:pass@%s:443/path/to/resource?key=value#fragment' % ( + request.param) + + +@pytest.fixture(params=valid_hosts) +def relative_uri(request): + return '//%s' % request.param + + +@pytest.fixture +def absolute_path_uri(): + return '/path/to/file' + + +@pytest.fixture(params=invalid_hosts) +def invalid_uri(request): + return 'https://%s' % request.param + + +@pytest.fixture +def scheme_and_path_uri(): + return 'mailto:user@example.com' + + +class TestURIReferenceParsesURIs: + """Tests for URIReference handling of URIs.""" + def test_handles_basic_uri(self, basic_uri): + """Test that URIReference can handle a simple URI.""" + uri = URIReference.from_string(basic_uri) + assert uri.scheme == 'http' + assert uri.authority == basic_uri[7:] # len('http://') + assert uri.host == uri.authority + assert uri.path is None + assert uri.query is None + assert uri.fragment is None + assert uri.port is None + assert uri.userinfo is None + + def test_handles_basic_uri_with_port(self, basic_uri_with_port): + """Test that URIReference can handle a simple URI with a port.""" + uri = URIReference.from_string(basic_uri_with_port) + assert uri.scheme == 'ftp' + assert uri.authority == basic_uri_with_port[6:] # len('ftp://') + assert uri.host != uri.authority + assert uri.port == '21' + assert uri.path is None + assert uri.query is None + assert uri.fragment is None + assert uri.userinfo is None + + def test_handles_uri_with_port_and_userinfo( + self, uri_with_port_and_userinfo): + """ + Test that URIReference can handle a URI with a port and userinfo. + """ + uri = URIReference.from_string(uri_with_port_and_userinfo) + assert uri.scheme == 'ssh' + # 6 == len('ftp://') + assert uri.authority == uri_with_port_and_userinfo[6:] + assert uri.host != uri.authority + assert uri.port == '22' + assert uri.path is None + assert uri.query is None + assert uri.fragment is None + assert uri.userinfo == 'user:pass' + + def test_handles_basic_uri_with_path(self, basic_uri_with_path): + """Test that URIReference can handle a URI with a path.""" + uri = URIReference.from_string(basic_uri_with_path) + assert uri.scheme == 'http' + assert basic_uri_with_path == (uri.scheme + '://' + uri.authority + + uri.path) + assert uri.host == uri.authority + assert uri.path == '/path/to/resource' + assert uri.query is None + assert uri.fragment is None + assert uri.userinfo is None + assert uri.port is None + + def test_handles_uri_with_path_and_query(self, uri_with_path_and_query): + """ + Test that URIReference can handle a URI with a path and query. + """ + uri = URIReference.from_string(uri_with_path_and_query) + assert uri.scheme == 'http' + assert uri.host == uri.authority + assert uri.path == '/path/to/resource' + assert uri.query == 'key=value' + assert uri.fragment is None + assert uri.userinfo is None + assert uri.port is None + + def test_handles_uri_with_everything(self, uri_with_everything): + """ + Test that URIReference can handle and with everything in it. + """ + uri = URIReference.from_string(uri_with_everything) + assert uri.scheme == 'https' + assert uri.path == '/path/to/resource' + assert uri.query == 'key=value' + assert uri.fragment == 'fragment' + assert uri.userinfo == 'user:pass' + assert uri.port == '443' + + def test_authority_info_raises_InvalidAuthority(self, invalid_uri): + """Test that an invalid IPv6 is caught by authority_info().""" + uri = URIReference.from_string(invalid_uri) + with pytest.raises(InvalidAuthority): + uri.authority_info() + + def test_attributes_catch_InvalidAuthority(self, invalid_uri): + """Test that an invalid IPv6 is caught by authority_info().""" + uri = URIReference.from_string(invalid_uri) + assert uri.host is None + assert uri.userinfo is None + assert uri.port is None + + def test_handles_relative_uri(self, relative_uri): + """Test that URIReference can handle a relative URI.""" + uri = URIReference.from_string(relative_uri) + assert uri.scheme is None + assert uri.authority == relative_uri[2:] + + def test_handles_absolute_path_uri(self, absolute_path_uri): + """Test that URIReference can handle a path-only URI.""" + uri = URIReference.from_string(absolute_path_uri) + assert uri.path == absolute_path_uri + assert uri.authority_info() == { + 'userinfo': None, + 'host': None, + 'port': None, + } + + def test_handles_scheme_and_path_uri(self, scheme_and_path_uri): + """Test that URIReference can handle a `scheme:path` URI.""" + uri = URIReference.from_string(scheme_and_path_uri) + assert uri.path == 'user@example.com' + assert uri.scheme == 'mailto' + assert uri.query is None + assert uri.host is None + assert uri.port is None + assert uri.userinfo is None + assert uri.authority is None + + +class TestURIValidation: + # Valid URI tests + def test_basic_uri_is_valid(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri.is_valid() is True + + def test_basic_uri_requiring_scheme(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri.is_valid(require_scheme=True) is True + + def test_basic_uri_requiring_authority(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri.is_valid(require_authority=True) is True + + def test_uri_with_everything_requiring_path(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.is_valid(require_path=True) is True + + def test_uri_with_everything_requiring_query(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.is_valid(require_query=True) is True + + def test_uri_with_everything_requiring_fragment(self, + uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.is_valid(require_fragment=True) is True + + def test_basic_uri_with_port_is_valid(self, basic_uri_with_port): + uri = URIReference.from_string(basic_uri_with_port) + assert uri.is_valid() is True + + def test_uri_with_port_and_userinfo_is_valid(self, + uri_with_port_and_userinfo): + uri = URIReference.from_string(uri_with_port_and_userinfo) + assert uri.is_valid() is True + + def test_basic_uri_with_path_is_valid(self, basic_uri_with_path): + uri = URIReference.from_string(basic_uri_with_path) + assert uri.is_valid() is True + + def test_uri_with_path_and_query_is_valid(self, uri_with_path_and_query): + uri = URIReference.from_string(uri_with_path_and_query) + assert uri.is_valid() is True + + def test_uri_with_everything_is_valid(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.is_valid() is True + + def test_relative_uri_is_valid(self, relative_uri): + uri = URIReference.from_string(relative_uri) + assert uri.is_valid() is True + + def test_absolute_path_uri_is_valid(self, absolute_path_uri): + uri = URIReference.from_string(absolute_path_uri) + assert uri.is_valid() is True + + def test_scheme_and_path_uri_is_valid(self, scheme_and_path_uri): + uri = URIReference.from_string(scheme_and_path_uri) + assert uri.is_valid() is True + + # Invalid URI tests + def test_invalid_uri_is_not_valid(self, invalid_uri): + uri = URIReference.from_string(invalid_uri) + assert uri.is_valid() is False + + def test_invalid_scheme(self): + uri = URIReference('123', None, None, None, None) + assert uri.is_valid() is False + + def test_invalid_path(self): + uri = URIReference(None, None, 'foo#bar', None, None) + assert uri.is_valid() is False + + def test_invalid_query_component(self): + uri = URIReference(None, None, None, 'foo#bar', None) + assert uri.is_valid() is False + + def test_invalid_fragment_component(self): + uri = URIReference(None, None, None, None, 'foo#bar') + assert uri.is_valid() is False + + +class TestURIReferenceUnsplits: + def test_basic_uri_unsplits(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri.unsplit() == basic_uri + + def test_basic_uri_with_port_unsplits(self, basic_uri_with_port): + uri = URIReference.from_string(basic_uri_with_port) + assert uri.unsplit() == basic_uri_with_port + + def test_uri_with_port_and_userinfo_unsplits(self, + uri_with_port_and_userinfo): + uri = URIReference.from_string(uri_with_port_and_userinfo) + assert uri.unsplit() == uri_with_port_and_userinfo + + def test_basic_uri_with_path_unsplits(self, basic_uri_with_path): + uri = URIReference.from_string(basic_uri_with_path) + assert uri.unsplit() == basic_uri_with_path + + def test_uri_with_path_and_query_unsplits(self, uri_with_path_and_query): + uri = URIReference.from_string(uri_with_path_and_query) + assert uri.unsplit() == uri_with_path_and_query + + def test_uri_with_everything_unsplits(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.unsplit() == uri_with_everything + + def test_relative_uri_unsplits(self, relative_uri): + uri = URIReference.from_string(relative_uri) + assert uri.unsplit() == relative_uri + + def test_absolute_path_uri_unsplits(self, absolute_path_uri): + uri = URIReference.from_string(absolute_path_uri) + assert uri.unsplit() == absolute_path_uri + + def test_scheme_and_path_uri_unsplits(self, scheme_and_path_uri): + uri = URIReference.from_string(scheme_and_path_uri) + assert uri.unsplit() == scheme_and_path_uri + + +class TestURIReferenceComparesToStrings: + def test_basic_uri(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri == basic_uri + + def test_basic_uri_with_port(self, basic_uri_with_port): + uri = URIReference.from_string(basic_uri_with_port) + assert uri == basic_uri_with_port + + def test_uri_with_port_and_userinfo(self, uri_with_port_and_userinfo): + uri = URIReference.from_string(uri_with_port_and_userinfo) + assert uri == uri_with_port_and_userinfo + + def test_basic_uri_with_path(self, basic_uri_with_path): + uri = URIReference.from_string(basic_uri_with_path) + assert uri == basic_uri_with_path + + def test_uri_with_path_and_query(self, uri_with_path_and_query): + uri = URIReference.from_string(uri_with_path_and_query) + assert uri == uri_with_path_and_query + + def test_uri_with_everything(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri == uri_with_everything + + def test_relative_uri(self, relative_uri): + uri = URIReference.from_string(relative_uri) + assert uri == relative_uri + + def test_absolute_path_uri(self, absolute_path_uri): + uri = URIReference.from_string(absolute_path_uri) + assert uri == absolute_path_uri + + def test_scheme_and_path_uri(self, scheme_and_path_uri): + uri = URIReference.from_string(scheme_and_path_uri) + assert uri == scheme_and_path_uri + + +class TestURIReferenceComparesToTuples: + def to_tuple(self, uri): + return URI_MATCHER.match(uri).groups() + + def test_basic_uri(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri == self.to_tuple(basic_uri) + + def test_basic_uri_with_port(self, basic_uri_with_port): + uri = URIReference.from_string(basic_uri_with_port) + assert uri == self.to_tuple(basic_uri_with_port) + + def test_uri_with_port_and_userinfo(self, uri_with_port_and_userinfo): + uri = URIReference.from_string(uri_with_port_and_userinfo) + assert uri == self.to_tuple(uri_with_port_and_userinfo) + + def test_basic_uri_with_path(self, basic_uri_with_path): + uri = URIReference.from_string(basic_uri_with_path) + assert uri == self.to_tuple(basic_uri_with_path) + + def test_uri_with_path_and_query(self, uri_with_path_and_query): + uri = URIReference.from_string(uri_with_path_and_query) + assert uri == self.to_tuple(uri_with_path_and_query) + + def test_uri_with_everything(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri == self.to_tuple(uri_with_everything) + + def test_relative_uri(self, relative_uri): + uri = URIReference.from_string(relative_uri) + assert uri == self.to_tuple(relative_uri) + + def test_absolute_path_uri(self, absolute_path_uri): + uri = URIReference.from_string(absolute_path_uri) + assert uri == self.to_tuple(absolute_path_uri) + + def test_scheme_and_path_uri(self, scheme_and_path_uri): + uri = URIReference.from_string(scheme_and_path_uri) + assert uri == self.to_tuple(scheme_and_path_uri) + + +def test_uri_comparison_raises_TypeError(basic_uri): + uri = URIReference.from_string(basic_uri) + with pytest.raises(TypeError): + uri == 1 + + +class TestURIReferenceComparesToURIReferences: + def test_same_basic_uri(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri == uri + + def test_different_basic_uris(self, basic_uri, basic_uri_with_port): + uri = URIReference.from_string(basic_uri) + assert (uri == URIReference.from_string(basic_uri_with_port)) is False -- cgit v1.2.1