"""URI normalization utilities."""
from __future__ import unicode_literals

import string

import six
from six.moves.urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit

from .oidutil import string_to_text


def remove_dot_segments(path):
    result_segments = []

    while path:
        if path.startswith('../'):
            path = path[3:]
        elif path.startswith('./'):
            path = path[2:]
        elif path.startswith('/./'):
            path = path[2:]
        elif path == '/.':
            path = '/'
        elif path.startswith('/../'):
            path = path[3:]
            if result_segments:
                result_segments.pop()
        elif path == '/..':
            path = '/'
            if result_segments:
                result_segments.pop()
        elif path == '..' or path == '.':
            path = ''
        else:
            i = 0
            if path[0] == '/':
                i = 1
            i = path.find('/', i)
            if i == -1:
                i = len(path)
            result_segments.append(path[:i])
            path = path[i:]

    return ''.join(result_segments)


GEN_DELIMS = ":" + "/" + "?" + "#" + "[" + "]" + "@"
SUB_DELIMS = "!" + "$" + "&" + "'" + "(" + ")" + "*" + "+" + "," + ";" + "="
RESERVED = GEN_DELIMS + SUB_DELIMS
UNRESERVED = string.ascii_letters + string.digits + "-" + "." + "_" + "~"
# Allow "%" as percent encoding character
PERCENT_ENCODING_CHARACTER = "%"


def _check_disallowed_characters(uri_part, part_name):
    # Roughly check the allowed characters. The check in not strict according to URI ABNF, but good enough.
    # Also allow "%" for percent encoding.
    if set(uri_part).difference(set(UNRESERVED + RESERVED + PERCENT_ENCODING_CHARACTER)):
        raise ValueError('Illegal characters in URI {}: {}'.format(part_name, uri_part))


def urinorm(uri):
    """Return normalized URI.

    Normalization if performed according to RFC 3986, section 6 https://tools.ietf.org/html/rfc3986#section-6.
    Supported URIs are URLs and OpenID realm URIs.

    @type uri: six.text_type, six.binary_type deprecated
    @rtype: six.text_type
    @raise ValueError: If URI is invalid.
    """
    uri = string_to_text(uri, "Binary input for urinorm is deprecated. Use text input instead.")

    split_uri = urlsplit(uri)

    # Normalize scheme
    scheme = split_uri.scheme.lower()
    if scheme not in ('http', 'https'):
        raise ValueError('Not an absolute HTTP or HTTPS URI: {!r}'.format(uri))

    # Normalize netloc
    if not split_uri.netloc:
        raise ValueError('Not an absolute URI: {!r}'.format(uri))

    hostname = split_uri.hostname
    if hostname is None:
        hostname = ''
    else:
        hostname = hostname.lower()
    # Unquote percent encoded characters
    hostname = unquote(hostname)
    # Quote IDN domain names
    try:
        # hostname: str --[idna]--> bytes --[utf-8]--> str
        hostname = hostname.encode('idna').decode('utf-8')
    except ValueError as error:
        raise ValueError('Invalid hostname {!r}: {}'.format(hostname, error))
    _check_disallowed_characters(hostname, 'hostname')

    try:
        port = split_uri.port
    except ValueError as error:
        raise ValueError('Invalid port in {!r}: {}'.format(split_uri.netloc, error))
    if port is None:
        port = ''
    elif (scheme == 'http' and port == 80) or (scheme == 'https' and port == 443):
        port = ''

    netloc = hostname
    if port:
        netloc = netloc + ':' + six.text_type(port)
    userinfo_chunks = [i for i in (split_uri.username, split_uri.password) if i is not None]
    if userinfo_chunks:
        userinfo = ':'.join(userinfo_chunks)
        _check_disallowed_characters(userinfo, 'userinfo')
        netloc = userinfo + '@' + netloc

    # Normalize path
    path = split_uri.path
    # Unquote and quote - this normalizes the percent encoding

    # This is hackish. `unquote` and `quote` requires `str` in both py27 and py3+.
    if isinstance(path, str):
        # Python 3 branch
        path = quote(unquote(path))
    else:
        # Python 2 branch
        path = quote(unquote(path.encode('utf-8'))).decode('utf-8')

    path = remove_dot_segments(path)
    if not path:
        path = '/'
    _check_disallowed_characters(path, 'path')

    # Normalize query
    data = parse_qsl(split_uri.query)
    query = urlencode(data)
    _check_disallowed_characters(query, 'query')

    # Normalize fragment
    fragment = unquote(split_uri.fragment)
    _check_disallowed_characters(fragment, 'fragment')

    return urlunsplit((scheme, netloc, path, query, fragment))