7 files changed, 719 insertions, 0 deletions
diff --git a/rfc3986/__init__.py b/rfc3986/__init__.py
new file mode 100644
index 0000000..57255d9
--- /dev/null
+++ b/rfc3986/__init__.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+rfc3986
+=======
+
+An implementation of semantics and validations described in RFC 3986. See
+http://rfc3986.rtfd.org/ for documentation.
+
+:copyright: (c) 2014 Rackspace
+:license: Apache v2.0, see LICENSE for details
+"""
+
+__title__ = 'rfc3986'
+__author__ = 'Ian Cordasco'
+__author_email__ = 'ian.cordasco@rackspace.com'
+__license__ = 'Apache v2.0'
+__copyright__ = 'Copyright 2014 Rackspace'
+__version__ = '0.2.0'
+
+from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri)
+
+__all__ = ['URIReference', 'uri_reference', 'is_valid_uri', 'normalize_uri']
diff --git a/rfc3986/api.py b/rfc3986/api.py
new file mode 100644
index 0000000..c993bb8
--- /dev/null
+++ b/rfc3986/api.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+rfc3986.api
+~~~~~~~~~~~
+
+This defines the simple API to rfc3986. This module defines 3 functions and
+provides access to the class ``URIReference``.
+"""
+
+from .uri import URIReference
+
+
+def uri_reference(uri, encoding='utf-8'):
+    """Parse a URI string into a URIReference.
+
+    This is a convenience function. You could achieve the same end by using
+    ``URIReference.from_string(uri)``.
+
+    :param str uri: The URI which needs to be parsed into a reference.
+    :param str encoding: The encoding of the string provided
+    :returns: A parsed URI
+    :rtype: :class:`URIReference`
+    """
+    return URIReference.from_string(uri, encoding)
+
+
+def is_valid_uri(uri, encoding='utf-8', **kwargs):
+    """Determine if the URI given is valid.
+
+    This is a convenience function. You could use either
+    ``uri_reference(uri).is_valid()`` or
+    ``URIReference.from_string(uri).is_valid()`` to achieve the same result.
+
+    :param str uri: The URI to be validated.
+    :param str encoding: The encoding of the string provided
+    :param bool require_scheme: Set to ``True`` if you wish to require the
+        presence of the scheme component.
+    :param bool require_authority: Set to ``True`` if you wish to require the
+        presence of the authority component.
+    :param bool require_path: Set to ``True`` if you wish to require the
+        presence of the path component.
+    :param bool require_query: Set to ``True`` if you wish to require the
+        presence of the query component.
+    :param bool require_fragment: Set to ``True`` if you wish to require the
+        presence of the fragment component.
+    :returns: ``True`` if the URI is valid, ``False`` otherwise.
+    :rtype: bool
+    """
+    return URIReference.from_string(uri, encoding).is_valid(**kwargs)
+
+
+def normalize_uri(uri, encoding='utf-8'):
+    """Normalize the given URI.
+
+    This is a convenience function. You could use either
+    ``uri_reference(uri).normalize().unsplit()`` or
+    ``URIReference.from_string(uri).normalize().unsplit()`` instead.
+
+    :param str uri: The URI to be normalized.
+    :param str encoding: The encoding of the string provided
+    :returns: The normalized URI.
+    :rtype: str
+    """
+    normalized_reference = URIReference.from_string(uri, encoding).normalize()
+    return normalized_reference.unsplit()
diff --git a/rfc3986/compat.py b/rfc3986/compat.py
new file mode 100644
index 0000000..6fc7f6d
--- /dev/null
+++ b/rfc3986/compat.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+
+if sys.version_info >= (3, 0):
+    unicode = str  # Python 3.x
+
+
+def to_str(b, encoding):
+    if hasattr(b, 'decode') and not isinstance(b, unicode):
+        b = b.decode('utf-8')
+    return b
+
+
+def to_bytes(s, encoding):
+    if hasattr(s, 'encode') and not isinstance(s, bytes):
+        s = s.encode('utf-8')
+    return s
diff --git a/rfc3986/exceptions.py b/rfc3986/exceptions.py
new file mode 100644
index 0000000..19ad191
--- /dev/null
+++ b/rfc3986/exceptions.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+class RFC3986Exception(Exception):
+    pass
+
+
+class InvalidAuthority(RFC3986Exception):
+    def __init__(self, authority):
+        super(InvalidAuthority, self).__init__(
+            "The authority ({0}) is not valid.".format(authority))
diff --git a/rfc3986/misc.py b/rfc3986/misc.py
new file mode 100644
index 0000000..d6205f6
--- /dev/null
+++ b/rfc3986/misc.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+rfc3986.misc
+~~~~~~~~~~~~
+
+This module contains important constants, patterns, and compiled regular
+expressions for parsing and validating URIs and their components.
+"""
+
+import re
+
+# These are enumerated for the named tuple used as a superclass of
+# URIReference
+URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
+
+important_characters = {
+    'generic_delimiters': ":/?#[]@",
+    'sub_delimiters': "!$&'()*+,;=",
+    # We need to escape the '*' in this case
+    're_sub_delimiters': "!$&'()\*+,;=",
+    'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
+                         '012345789._~-'),
+    # We need to escape the '-' in this case:
+    're_unreserved': 'A-Za-z0-9._~\-',
+    }
+# For details about delimiters and reserved characters, see:
+# http://tools.ietf.org/html/rfc3986#section-2.2
+GENERIC_DELIMITERS = set(important_characters['generic_delimiters'])
+SUB_DELIMITERS = set(important_characters['sub_delimiters'])
+RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS)
+# For details about unreserved characters, see:
+# http://tools.ietf.org/html/rfc3986#section-2.3
+UNRESERVED_CHARS = set(important_characters['unreserved_chars'])
+NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%')
+
+# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
+component_pattern_dict = {
+    'scheme': '[^:/?#]+',
+    'authority': '[^/?#]*',
+    'path': '[^?#]*',
+    'query': '[^#]*',
+    'fragment': '.*',
+    }
+
+# See http://tools.ietf.org/html/rfc3986#appendix-B
+# In this case, we name each of the important matches so we can use
+# SRE_Match#groupdict to parse the values out if we so choose. This is also
+# modified to ignore other matches that are not important to the parsing of
+# the reference so we can also simply use SRE_Match#groups.
+expression = ('(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
+              '(?P<path>{path})(?:\?(?P<query>{query}))?'
+              '(?:#(?P<fragment>{fragment}))?'
+              ).format(**component_pattern_dict)
+
+URI_MATCHER = re.compile(expression)
+
+# #########################
+# Authority Matcher Section
+# #########################
+
+# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
+# The pattern for a regular name, e.g.,  www.google.com, api.github.com
+reg_name = '[\w\d.]+'
+# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
+ipv4 = '(\d{1,3}.){3}\d{1,3}'
+# Hexadecimal characters used in each piece of an IPv6 address
+hexdig = '[0-9A-Fa-f]{1,4}'
+# Least-significant 32 bits of an IPv6 address
+ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4)
+# Substitutions into the following patterns for IPv6 patterns defined
+# http://tools.ietf.org/html/rfc3986#page-20
+subs = {'hex': hexdig, 'ls32': ls32}
+
+# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
+# about ABNF (Augmented Backus-Naur Form) use in the comments
+variations = [
+    #                            6( h16 ":" ) ls32
+    '(%(hex)s:){6}%(ls32)s' % subs,
+    #                       "::" 5( h16 ":" ) ls32
+    '::(%(hex)s:){5}%(ls32)s' % subs,
+    # [               h16 ] "::" 4( h16 ":" ) ls32
+    '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs,
+    # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+    '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs,
+    # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+    '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs,
+    # [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
+    '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs,
+    # [ *4( h16 ":" ) h16 ] "::"              ls32
+    '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs,
+    # [ *5( h16 ":" ) h16 ] "::"              h16
+    '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs,
+    # [ *6( h16 ":" ) h16 ] "::"
+    '((%(hex)s:){0,6}%(hex)s)?::' % subs,
+    ]
+
+ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations)
+
+ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % (
+    important_characters['re_unreserved'] +
+    important_characters['re_sub_delimiters'] +
+    ':')
+
+ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future)
+
+# Pattern for matching the host piece of the authority
+HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal)
+
+SUBAUTHORITY_MATCHER = re.compile((
+    '^(?:(?P<userinfo>[A-Za-z0-9_.~\-%:]+)@)?'  # userinfo
+    '(?P<host>{0}?)'  # host
+    ':?(?P<port>\d+)?$'  # port
+    ).format(HOST_PATTERN))
+
+
+# ####################
+# Path Matcher Section
+# ####################
+
+# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
+# about the path patterns defined below.
+
+# Percent encoded character values
+pct_encoded = '%[A-Fa-f0-9]{2}'
+pchar = ('([' + important_characters['re_unreserved']
+         + important_characters['re_sub_delimiters']
+         + ':@]|%s)' % pct_encoded)
+segments = {
+    'segment': pchar + '*',
+    # Non-zero length segment
+    'segment-nz': pchar + '+',
+    # Non-zero length segment without ":"
+    'segment-nz-nc': pchar.replace(':', '') + '+'
+    }
+
+# Path types taken from Section 3.3 (linked above)
+path_empty = '^$'
+path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments
+path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments
+path_absolute = '/(%s)?' % path_rootless
+path_abempty = '(/%(segment)s)*' % segments
+
+# Matcher used to validate path components
+PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % (
+    path_abempty, path_absolute, path_noscheme, path_rootless, path_empty
+    ))
+
+
+# ##################################
+# Query and Fragment Matcher Section
+# ##################################
+
+QUERY_MATCHER = re.compile(
+    '^([/?:@' + important_characters['re_unreserved']
+    + important_characters['re_sub_delimiters']
+    + ']|%s)*$' % pct_encoded)
+
+FRAGMENT_MATCHER = QUERY_MATCHER
+
+# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
+SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$')
+
+# Relative reference matcher
+
+# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
+relative_part = '(//%s%s|%s|%s|%s)' % (
+    component_pattern_dict['authority'], path_abempty, path_absolute,
+    path_noscheme, path_empty
+    )
+
+RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % (
+    relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern
+    ))
+
+# See http://tools.ietf.org/html/rfc3986#section-3 for definition
+hier_part = '(//%s%s|%s|%s|%s)' % (
+    component_pattern_dict['authority'], path_abempty, path_absolute,
+    path_rootless, path_empty
+    )
+
+# See http://tools.ietf.org/html/rfc3986#section-4.3
+ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)$' % (
+    component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern
+    ))
diff --git a/rfc3986/normalizers.py b/rfc3986/normalizers.py
new file mode 100644
index 0000000..d232093
--- /dev/null
+++ b/rfc3986/normalizers.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+from .compat import to_bytes
+from .misc import NON_PCT_ENCODED
+
+
+def normalize_scheme(scheme):
+    return scheme.lower()
+
+
+def normalize_authority(authority):
+    userinfo, host, port = authority
+    result = ''
+    if userinfo:
+        result += normalize_percent_characters(userinfo) + '@'
+    if host:
+        result += host.lower()
+    if port:
+        result += ':' + port
+    return result
+
+
+def normalize_path(path):
+    path = normalize_percent_characters(path)
+    return remove_dot_segments(path)
+
+
+def normalize_query(query):
+    return normalize_percent_characters(query)
+
+
+def normalize_fragment(fragment):
+    return normalize_percent_characters(fragment)
+
+
+PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}')
+
+
+def normalize_percent_characters(s):
+    """All percent characters should be upper-cased.
+
+    For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
+    """
+    matches = set(PERCENT_MATCHER.findall(s))
+    for m in matches:
+        if not m.isupper():
+            s = s.replace(m, m.upper())
+    return s
+
+
+def remove_dot_segments(s):
+    # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
+    segments = s.split('/')  # Turn the path into a list of segments
+    output = []  # Initialize the variable to use to store output
+
+    for segment in segments:
+        # '.' is the current directory, so ignore it, it is superfluous
+        if segment == '.':
+            continue
+        # Anything other than '..', should be appended to the output
+        elif segment != '..':
+            output.append(segment)
+        # In this case segment == '..', if we can, we should pop the last
+        # element
+        elif output:
+            output.pop()
+
+    # If the path starts with '/' and the output is empty or the first string
+    # is non-empty
+    if s.startswith('/') and (not output or output[0]):
+        output.insert(0, '')
+
+    # If the path starts with '/.' or '/..' ensure we add one more empty
+    # string to add a trailing '/'
+    if s.endswith(('/.', '/..')):
+        output.append('')
+
+    return '/'.join(output)
+
+
+def encode_component(uri_component, encoding):
+    if uri_component is None:
+        return uri_component
+
+    uri_bytes = to_bytes(uri_component, encoding)
+
+    encoded_uri = bytearray()
+
+    for i in range(0, len(uri_bytes)):
+        # Will return a single character bytestring on both Python 2 & 3
+        byte = uri_bytes[i:i+1]
+        byte_ord = ord(byte)
+        if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED:
+            encoded_uri.extend(byte)
+            continue
+        encoded_uri.extend('%{0:02x}'.format(byte_ord).encode())
+
+    return encoded_uri.decode(encoding)
diff --git a/rfc3986/uri.py b/rfc3986/uri.py
new file mode 100644
index 0000000..8df4cda
--- /dev/null
+++ b/rfc3986/uri.py
@@ -0,0 +1,256 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2014 Rackspace
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+
+from .compat import to_str
+from .exceptions import InvalidAuthority
+from .misc import (
+    FRAGMENT_MATCHER, PATH_MATCHER, QUERY_MATCHER, SCHEME_MATCHER,
+    SUBAUTHORITY_MATCHER, URI_MATCHER, URI_COMPONENTS
+    )
+from .normalizers import (
+    encode_component, normalize_scheme, normalize_authority, normalize_path,
+    normalize_query, normalize_fragment
+    )
+
+
+class URIReference(namedtuple('URIReference', URI_COMPONENTS)):
+    slots = ()
+
+    def __new__(cls, scheme, authority, path, query, fragment):
+        return super(URIReference, cls).__new__(
+            cls,
+            scheme or None,
+            authority or None,
+            path or None,
+            query or None,
+            fragment or None)
+
+    def __eq__(self, other):
+        other_ref = other
+        if isinstance(other, tuple):
+            other_ref = URIReference(*other)
+        elif not isinstance(other, URIReference):
+            try:
+                other_ref = URIReference.from_string(other)
+            except TypeError:
+                raise TypeError(
+                    'Unable to compare URIReference() to {0}()'.format(
+                        type(other).__name__))
+
+        # See http://tools.ietf.org/html/rfc3986#section-6.2
+        naive_equality = tuple(self) == tuple(other_ref)
+        return naive_equality or self.normalized_equality(other_ref)
+
+    @classmethod
+    def from_string(cls, uri_string, encoding='utf-8'):
+        """Parse a URI reference from the given unicode URI string.
+
+        :param str uri_string: Unicode URI to be parsed into a reference.
+        :param str encoding: The encoding of the string provided
+        :returns: :class:`URIReference` or subclass thereof
+        """
+        uri_string = to_str(uri_string, encoding)
+
+        split_uri = URI_MATCHER.match(uri_string).groupdict()
+        return URIReference(split_uri['scheme'], split_uri['authority'],
+                            encode_component(split_uri['path'], encoding),
+                            encode_component(split_uri['query'], encoding),
+                            encode_component(split_uri['fragment'], encoding))
+
+    def authority_info(self):
+        """Returns a dictionary with the ``userinfo``, ``host``, and ``port``.
+
+        If the authority is not valid, it will raise a ``InvalidAuthority``
+        Exception.
+
+        :returns:
+            ``{'userinfo': 'username:password', 'host': 'www.example.com',
+            'port': '80'}``
+        :rtype: dict
+        :raises InvalidAuthority: If the authority is not ``None`` and can not
+            be parsed.
+        """
+        if not self.authority:
+            return {'userinfo': None, 'host': None, 'port': None}
+
+        match = SUBAUTHORITY_MATCHER.match(self.authority)
+
+        if match is None:
+            # In this case, we have an authority that was parsed from the URI
+            # Reference, but it cannot be further parsed by our
+            # SUBAUTHORITY_MATCHER. In this case it must not be a valid
+            # authority.
+            raise InvalidAuthority(self.authority)
+
+        return match.groupdict()
+
+    @property
+    def host(self):
+        """If present, a string representing the host."""
+        try:
+            authority = self.authority_info()
+        except InvalidAuthority:
+            return None
+        return authority['host']
+
+    @property
+    def port(self):
+        """If present, the port (as a string) extracted from the authority."""
+        try:
+            authority = self.authority_info()
+        except InvalidAuthority:
+            return None
+        return authority['port']
+
+    @property
+    def userinfo(self):
+        """If present, the userinfo extracted from the authority."""
+        try:
+            authority = self.authority_info()
+        except InvalidAuthority:
+            return None
+        return authority['userinfo']
+
+    def is_valid(self, **kwargs):
+        """Determines if the URI is valid.
+
+        :param bool require_scheme: Set to ``True`` if you wish to require the
+            presence of the scheme component.
+        :param bool require_authority: Set to ``True`` if you wish to require
+            the presence of the authority component.
+        :param bool require_path: Set to ``True`` if you wish to require the
+            presence of the path component.
+        :param bool require_query: Set to ``True`` if you wish to require the
+            presence of the query component.
+        :param bool require_fragment: Set to ``True`` if you wish to require
+            the presence of the fragment component.
+        :returns: ``True`` if the URI is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        validators = [
+            (self.scheme_is_valid, kwargs.get('require_scheme', False)),
+            (self.authority_is_valid, kwargs.get('require_authority', False)),
+            (self.path_is_valid, kwargs.get('require_path', False)),
+            (self.query_is_valid, kwargs.get('require_query', False)),
+            (self.fragment_is_valid, kwargs.get('require_fragment', False)),
+            ]
+        return all(v(r) for v, r in validators)
+
+    def _is_valid(self, value, matcher, require):
+        if require:
+            return (value is not None
+                    and matcher.match(value))
+
+        # require is False and value is not None
+        return value is None or matcher.match(value)
+
+    def authority_is_valid(self, require=False):
+        """Determines if the authority component is valid.
+
+        :param str require: Set to ``True`` to require the presence of this
+            component.
+        :returns: ``True`` if the authority is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        return self._is_valid(self.authority, SUBAUTHORITY_MATCHER, require)
+
+    def scheme_is_valid(self, require=False):
+        """Determines if the scheme component is valid.
+
+        :param str require: Set to ``True`` to require the presence of this
+            component.
+        :returns: ``True`` if the scheme is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        return self._is_valid(self.scheme, SCHEME_MATCHER, require)
+
+    def path_is_valid(self, require=False):
+        """Determines if the path component is valid.
+
+        :param str require: Set to ``True`` to require the presence of this
+            component.
+        :returns: ``True`` if the path is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        return self._is_valid(self.path, PATH_MATCHER, require)
+
+    def query_is_valid(self, require=False):
+        """Determines if the query component is valid.
+
+        :param str require: Set to ``True`` to require the presence of this
+            component.
+        :returns: ``True`` if the query is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        return self._is_valid(self.query, QUERY_MATCHER, require)
+
+    def fragment_is_valid(self, require=False):
+        """Determines if the fragment component is valid.
+
+        :param str require: Set to ``True`` to require the presence of this
+            component.
+        :returns: ``True`` if the fragment is valid. ``False`` otherwise.
+        :rtype: bool
+        """
+        return self._is_valid(self.fragment, FRAGMENT_MATCHER, require)
+
+    def normalize(self):
+        """Normalize this reference as described in Section 6.2.2
+
+        This is not an in-place normalization. Instead this creates a new
+        URIReference.
+
+        :returns: A new reference object with normalized components.
+        :rtype: URIReference
+        """
+        # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
+        # this method.
+        return URIReference(normalize_scheme(self.scheme or ''),
+                            normalize_authority(
+                                (self.userinfo, self.host, self.port)),
+                            normalize_path(self.path or ''),
+                            normalize_query(self.query or ''),
+                            normalize_fragment(self.fragment or ''))
+
+    def normalized_equality(self, other_ref):
+        """Compare this URIReference to another URIReference.
+
+        :param URIReference other_ref: (required), The reference with which
+            we're comparing.
+        :returns: ``True`` if the references are equal, ``False`` otherwise.
+        :rtype: bool
+        """
+        return tuple(self.normalize()) == tuple(other_ref.normalize())
+
+    def unsplit(self):
+        """Create a URI string from the components.
+
+        :returns: The URI Reference reconstituted as a string.
+        :rtype: str
+        """
+        # See http://tools.ietf.org/html/rfc3986#section-5.3
+        result_list = []
+        if self.scheme:
+            result_list.extend([self.scheme, ':'])
+        if self.authority:
+            result_list.extend(['//', self.authority])
+        if self.path:
+            result_list.append(self.path)
+        if self.query:
+            result_list.extend(['?', self.query])
+        if self.fragment:
+            result_list.extend(['#', self.fragment])
+        return ''.join(result_list)